synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +20 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1709 -243
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Compatibility wrapper for the mini-SWE task app."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from fastapi.exceptions import RequestValidationError
|
|
9
|
+
from fastapi.responses import JSONResponse
|
|
10
|
+
from starlette.requests import Request
|
|
11
|
+
from synth_ai.task.apps import ModalDeploymentConfig, registry
|
|
12
|
+
from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
|
|
13
|
+
from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
|
|
14
|
+
|
|
15
|
+
from .grpo_swe_mini import build_config
|
|
16
|
+
|
|
17
|
+
APP_ID = "swe-mini"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _build_base_config() -> TaskAppConfig:
|
|
21
|
+
return build_config()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
_REGISTERED_ENTRY = registry.get(APP_ID)
|
|
26
|
+
except Exception: # pragma: no cover - registry unavailable in some contexts
|
|
27
|
+
MODAL_DEPLOYMENT: ModalDeploymentConfig | None = None
|
|
28
|
+
ENV_FILES: tuple[str, ...] = ()
|
|
29
|
+
else:
|
|
30
|
+
MODAL_DEPLOYMENT = _REGISTERED_ENTRY.modal
|
|
31
|
+
ENV_FILES = tuple(_REGISTERED_ENTRY.env_files)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_task_app_config() -> TaskAppConfig:
|
|
35
|
+
base = _build_base_config()
|
|
36
|
+
return base.clone()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def fastapi_app():
|
|
40
|
+
app = create_task_app(build_task_app_config())
|
|
41
|
+
|
|
42
|
+
filtered_routes = []
|
|
43
|
+
for route in app.router.routes:
|
|
44
|
+
path = getattr(route, "path", None)
|
|
45
|
+
methods = getattr(route, "methods", set()) or set()
|
|
46
|
+
if path in {"/health", "/health/rollout"} and "GET" in methods:
|
|
47
|
+
continue
|
|
48
|
+
filtered_routes.append(route)
|
|
49
|
+
app.router.routes = filtered_routes
|
|
50
|
+
|
|
51
|
+
def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
|
|
52
|
+
if not env_key:
|
|
53
|
+
return None
|
|
54
|
+
prefix = env_key[: max(1, len(env_key) // 2)]
|
|
55
|
+
print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
|
|
56
|
+
return prefix
|
|
57
|
+
|
|
58
|
+
@app.get("/health")
|
|
59
|
+
async def health(request: Request):
|
|
60
|
+
env_key = normalize_environment_api_key()
|
|
61
|
+
if not env_key:
|
|
62
|
+
return JSONResponse(
|
|
63
|
+
status_code=503,
|
|
64
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
65
|
+
)
|
|
66
|
+
if not is_api_key_header_authorized(request):
|
|
67
|
+
prefix = _log_env_key_prefix("health", env_key)
|
|
68
|
+
content = {"status": "healthy", "authorized": False}
|
|
69
|
+
if prefix:
|
|
70
|
+
content["expected_api_key_prefix"] = prefix
|
|
71
|
+
return JSONResponse(status_code=200, content=content)
|
|
72
|
+
return {"status": "healthy", "authorized": True}
|
|
73
|
+
|
|
74
|
+
@app.get("/health/rollout")
|
|
75
|
+
async def health_rollout(request: Request):
|
|
76
|
+
env_key = normalize_environment_api_key()
|
|
77
|
+
if not env_key:
|
|
78
|
+
return JSONResponse(
|
|
79
|
+
status_code=503,
|
|
80
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
81
|
+
)
|
|
82
|
+
if not is_api_key_header_authorized(request):
|
|
83
|
+
prefix = _log_env_key_prefix("health/rollout", env_key)
|
|
84
|
+
content = {"status": "healthy", "authorized": False}
|
|
85
|
+
if prefix:
|
|
86
|
+
content["expected_api_key_prefix"] = prefix
|
|
87
|
+
return JSONResponse(status_code=200, content=content)
|
|
88
|
+
return {"ok": True, "authorized": True}
|
|
89
|
+
|
|
90
|
+
@app.exception_handler(RequestValidationError)
|
|
91
|
+
async def _on_validation_error(request: Request, exc: RequestValidationError):
|
|
92
|
+
try:
|
|
93
|
+
hdr = request.headers
|
|
94
|
+
snapshot = {
|
|
95
|
+
"path": str(request.url.path),
|
|
96
|
+
"have_x_api_key": bool(hdr.get("x-api-key")),
|
|
97
|
+
"have_x_api_keys": bool(hdr.get("x-api-keys")),
|
|
98
|
+
"have_authorization": bool(hdr.get("authorization")),
|
|
99
|
+
"errors": exc.errors()[:5],
|
|
100
|
+
}
|
|
101
|
+
print("[422] validation", snapshot, flush=True)
|
|
102
|
+
except Exception:
|
|
103
|
+
pass
|
|
104
|
+
return JSONResponse(
|
|
105
|
+
status_code=422,
|
|
106
|
+
content={"status": "invalid", "detail": exc.errors()[:5]},
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return app
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
if __name__ == "__main__":
|
|
113
|
+
parser = argparse.ArgumentParser(description="Run the mini-SWE task app locally")
|
|
114
|
+
parser.add_argument("--host", default="0.0.0.0")
|
|
115
|
+
parser.add_argument("--port", type=int, default=8020)
|
|
116
|
+
parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
|
|
117
|
+
parser.add_argument(
|
|
118
|
+
"--env-file",
|
|
119
|
+
action="append",
|
|
120
|
+
default=[],
|
|
121
|
+
help="Additional .env files to load before startup",
|
|
122
|
+
)
|
|
123
|
+
args = parser.parse_args()
|
|
124
|
+
|
|
125
|
+
default_env = Path(__file__).resolve().parents[4] / "backend" / ".env.dev"
|
|
126
|
+
env_files = [str(default_env)] if default_env.exists() else []
|
|
127
|
+
env_files.extend(args.env_file or [])
|
|
128
|
+
|
|
129
|
+
run_task_app(
|
|
130
|
+
build_task_app_config,
|
|
131
|
+
host=args.host,
|
|
132
|
+
port=args.port,
|
|
133
|
+
reload=args.reload,
|
|
134
|
+
env_files=env_files,
|
|
135
|
+
)
|
|
136
|
+
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# GRPO Synth Envs Hosted Service
|
|
2
|
+
|
|
3
|
+
This service provides hosted environment and policy management for GRPO (Group Relative Policy Optimization) training with synthetic environments.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
The service implements a FastAPI-based HTTP API that manages:
|
|
8
|
+
- **Environments**: Stateful environment instances (currently Crafter)
|
|
9
|
+
- **Policies**: Thin policy clients that prepare inference requests
|
|
10
|
+
- **Rollouts**: Coordinated execution of environment-policy interaction loops
|
|
11
|
+
- **Snapshots**: State persistence using Modal Volumes
|
|
12
|
+
- **Branching**: Creating multiple copies of environments/policies for exploration
|
|
13
|
+
|
|
14
|
+
## Key Components
|
|
15
|
+
|
|
16
|
+
### Core Modules
|
|
17
|
+
- `hosted_app.py`: FastAPI app factory and configuration
|
|
18
|
+
- `registry.py`: In-memory registries for active instances
|
|
19
|
+
- `storage/volume.py`: Modal Volume operations for snapshots
|
|
20
|
+
- `inference/openai_client.py`: OpenAI-compatible inference client
|
|
21
|
+
|
|
22
|
+
### API Routers
|
|
23
|
+
- `environment_routes.py`: Environment lifecycle endpoints
|
|
24
|
+
- `policy_routes.py`: Policy lifecycle endpoints
|
|
25
|
+
- `rollout.py`: Rollout coordinator and run management
|
|
26
|
+
- `branching.py`: Branching operations
|
|
27
|
+
|
|
28
|
+
### Environment Implementations
|
|
29
|
+
- `envs/crafter/`: Crafter environment and policy implementations
|
|
30
|
+
|
|
31
|
+
## API Endpoints
|
|
32
|
+
|
|
33
|
+
### Service Discovery
|
|
34
|
+
- `GET /info`: Service configuration and endpoints
|
|
35
|
+
- `GET /health`: Health check
|
|
36
|
+
|
|
37
|
+
### Environment Management
|
|
38
|
+
- `POST /env/create`: Create new environment
|
|
39
|
+
- `POST /env/reset`: Reset environment
|
|
40
|
+
- `POST /env/step`: Execute environment step
|
|
41
|
+
- `POST /env/snapshot`: Save environment state
|
|
42
|
+
- `POST /env/restore`: Restore from snapshot
|
|
43
|
+
- `POST /env/terminate`: Clean up environment
|
|
44
|
+
|
|
45
|
+
### Policy Management
|
|
46
|
+
- `POST /policy/create`: Create new policy
|
|
47
|
+
- `POST /policy/step`: Generate actions (with optional inference)
|
|
48
|
+
- `POST /policy/snapshot`: Save policy state
|
|
49
|
+
- `POST /policy/restore`: Restore from snapshot
|
|
50
|
+
- `POST /policy/terminate`: Clean up policy
|
|
51
|
+
|
|
52
|
+
### Coordination
|
|
53
|
+
- `POST /rollout`: Execute coordinated rollout
|
|
54
|
+
- `POST /branch`: Create environment/policy branches
|
|
55
|
+
- `POST /run/abort`: Abort running rollout
|
|
56
|
+
- `GET /run/status/{run_id}`: Check run status
|
|
57
|
+
|
|
58
|
+
## Local Development
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Install dependencies
|
|
62
|
+
pip install fastapi uvicorn httpx pydantic
|
|
63
|
+
|
|
64
|
+
# Run the service
|
|
65
|
+
python main.py
|
|
66
|
+
|
|
67
|
+
# Or with uvicorn directly
|
|
68
|
+
uvicorn main:app --reload --port 8000
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Modal Deployment
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Deploy to Modal
|
|
75
|
+
modal deploy main.py
|
|
76
|
+
|
|
77
|
+
# Run once
|
|
78
|
+
modal run main.py
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Environment Variables
|
|
82
|
+
|
|
83
|
+
- `SERVICE_BASE_URL`: Base URL for this service (default: http://localhost:8000)
|
|
84
|
+
- `VLLM_BASE_URL`: Base URL for vLLM inference service (default: http://localhost:8001)
|
|
85
|
+
- `DEFAULT_MODEL`: Default model name for inference
|
|
86
|
+
|
|
87
|
+
## Storage
|
|
88
|
+
|
|
89
|
+
The service uses Modal Volumes for persistent storage:
|
|
90
|
+
- Volume name: `synth-env-state`
|
|
91
|
+
- Mount path: `/data/state`
|
|
92
|
+
- Layout: `/data/state/runs/{rl_run_id}/{kind}/{shard}/{snapshot_id}.tar.gz`
|
|
93
|
+
|
|
94
|
+
## Example Usage
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
import httpx
|
|
98
|
+
|
|
99
|
+
# Create environment
|
|
100
|
+
env_response = httpx.post(
|
|
101
|
+
"http://localhost:8000/env/create",
|
|
102
|
+
json={
|
|
103
|
+
"env_name": "crafter",
|
|
104
|
+
"config": {},
|
|
105
|
+
"seed": 42,
|
|
106
|
+
"rl_run_id": "test-run-1"
|
|
107
|
+
}
|
|
108
|
+
)
|
|
109
|
+
env_id = env_response.json()["env_id"]
|
|
110
|
+
|
|
111
|
+
# Create policy
|
|
112
|
+
policy_response = httpx.post(
|
|
113
|
+
"http://localhost:8000/policy/create",
|
|
114
|
+
json={
|
|
115
|
+
"policy_name": "crafter-react",
|
|
116
|
+
"config": {"inference_url": "http://vllm:8001"},
|
|
117
|
+
"rl_run_id": "test-run-1",
|
|
118
|
+
"bound_env_id": env_id
|
|
119
|
+
}
|
|
120
|
+
)
|
|
121
|
+
policy_id = policy_response.json()["policy_id"]
|
|
122
|
+
|
|
123
|
+
# Execute rollout
|
|
124
|
+
rollout_response = httpx.post(
|
|
125
|
+
"http://localhost:8000/rollout",
|
|
126
|
+
json={
|
|
127
|
+
"run_id": "test-run-1",
|
|
128
|
+
"env": {"env_id": env_id},
|
|
129
|
+
"policy": {"policy_id": policy_id},
|
|
130
|
+
"ops": ["agent", "env"] * 10,
|
|
131
|
+
"on_done": "reset"
|
|
132
|
+
}
|
|
133
|
+
)
|
|
134
|
+
trajectories = rollout_response.json()["trajectories"]
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Testing
|
|
138
|
+
|
|
139
|
+
The implementation follows the plan outlined in `plan.md` and decisions in `decisions.md`. Key test areas:
|
|
140
|
+
- Environment create/step/reset lifecycle
|
|
141
|
+
- Policy inference request building
|
|
142
|
+
- Snapshot/restore round trips
|
|
143
|
+
- Rollout coordination with abort support
|
|
144
|
+
- Branching operations
|
|
145
|
+
|
|
146
|
+
4b
|
|
147
|
+
"aggregate": {
|
|
148
|
+
"completed": 20,
|
|
149
|
+
"total": 20,
|
|
150
|
+
"avg_turns": 10.0,
|
|
151
|
+
"avg_achievements": 1.3,
|
|
152
|
+
"achievements_freq": {
|
|
153
|
+
"collect_wood": 9,
|
|
154
|
+
"collect_sapling": 8,
|
|
155
|
+
"collect_drink": 7,
|
|
156
|
+
"place_plant": 2
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
groq qwen/qwen3-32b
|
|
162
|
+
],
|
|
163
|
+
"aggregate": {
|
|
164
|
+
"completed": 20,
|
|
165
|
+
"total": 20,
|
|
166
|
+
"avg_turns": 10.0,
|
|
167
|
+
"avg_achievements": 1.0,
|
|
168
|
+
"achievements_freq": {
|
|
169
|
+
"collect_sapling": 7,
|
|
170
|
+
"collect_wood": 9,
|
|
171
|
+
"collect_drink": 4
|
|
172
|
+
}
|
|
173
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, HTTPException
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from .registry import registry
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
router = APIRouter()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BranchRequest(BaseModel):
|
|
16
|
+
env_ids: list[str] | None = None
|
|
17
|
+
policy_ids: list[str] | None = None
|
|
18
|
+
num_children: int = 1
|
|
19
|
+
max_branches: int = 10
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class BranchResponse(BaseModel):
|
|
23
|
+
env_branches: dict[str, list[str]]
|
|
24
|
+
policy_branches: dict[str, list[str]]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@router.post("/branch", response_model=BranchResponse)
|
|
28
|
+
async def create_branches(request: BranchRequest) -> BranchResponse:
|
|
29
|
+
"""Create branches of environments and/or policies."""
|
|
30
|
+
|
|
31
|
+
if request.num_children > request.max_branches:
|
|
32
|
+
raise HTTPException(
|
|
33
|
+
status_code=422,
|
|
34
|
+
detail=f"num_children ({request.num_children}) exceeds max_branches ({request.max_branches})",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
env_branches = {}
|
|
38
|
+
policy_branches = {}
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
# Branch environments
|
|
42
|
+
if request.env_ids:
|
|
43
|
+
for env_id in request.env_ids:
|
|
44
|
+
env_handle = registry.get_env(env_id)
|
|
45
|
+
if not env_handle:
|
|
46
|
+
logger.warning(f"Environment {env_id} not found, skipping")
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
child_ids = []
|
|
50
|
+
|
|
51
|
+
for child_idx in range(request.num_children):
|
|
52
|
+
# Create snapshot of parent
|
|
53
|
+
from .environment_routes import (
|
|
54
|
+
EnvSnapshotRequest,
|
|
55
|
+
snapshot_environment,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
snapshot_response = await snapshot_environment(
|
|
59
|
+
EnvSnapshotRequest(env_id=env_id)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Restore to new environment with modified seed
|
|
63
|
+
from .environment_routes import (
|
|
64
|
+
EnvRestoreRequest,
|
|
65
|
+
restore_environment,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
restore_response = await restore_environment(
|
|
69
|
+
EnvRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
child_id = restore_response.env_id
|
|
73
|
+
child_handle = registry.get_env(child_id)
|
|
74
|
+
|
|
75
|
+
# Update child seed for determinism
|
|
76
|
+
if child_handle and child_handle.seed is not None:
|
|
77
|
+
child_handle.seed = child_handle.seed + child_idx + 1
|
|
78
|
+
child_handle.env.seed = child_handle.seed
|
|
79
|
+
|
|
80
|
+
child_ids.append(child_id)
|
|
81
|
+
|
|
82
|
+
# Track parent relationship in snapshot metadata
|
|
83
|
+
snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
|
|
84
|
+
if snapshot_meta:
|
|
85
|
+
snapshot_meta.parent_snapshot_id = env_id
|
|
86
|
+
|
|
87
|
+
env_branches[env_id] = child_ids
|
|
88
|
+
|
|
89
|
+
# Branch policies
|
|
90
|
+
if request.policy_ids:
|
|
91
|
+
for policy_id in request.policy_ids:
|
|
92
|
+
policy_handle = registry.get_policy(policy_id)
|
|
93
|
+
if not policy_handle:
|
|
94
|
+
logger.warning(f"Policy {policy_id} not found, skipping")
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
child_ids = []
|
|
98
|
+
|
|
99
|
+
for child_idx in range(request.num_children):
|
|
100
|
+
# Create snapshot of parent
|
|
101
|
+
from .policy_routes import PolicySnapshotRequest, snapshot_policy
|
|
102
|
+
|
|
103
|
+
snapshot_response = await snapshot_policy(
|
|
104
|
+
PolicySnapshotRequest(policy_id=policy_id)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Restore to new policy
|
|
108
|
+
from .policy_routes import PolicyRestoreRequest, restore_policy
|
|
109
|
+
|
|
110
|
+
restore_response = await restore_policy(
|
|
111
|
+
PolicyRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
child_id = restore_response.policy_id
|
|
115
|
+
child_ids.append(child_id)
|
|
116
|
+
|
|
117
|
+
# Copy bound environment if parent had one
|
|
118
|
+
child_handle = registry.get_policy(child_id)
|
|
119
|
+
if child_handle and policy_handle.bound_env_id:
|
|
120
|
+
# If we also branched the env, bind to corresponding child
|
|
121
|
+
if policy_handle.bound_env_id in env_branches:
|
|
122
|
+
child_envs = env_branches[policy_handle.bound_env_id]
|
|
123
|
+
if child_idx < len(child_envs):
|
|
124
|
+
child_handle.bound_env_id = child_envs[child_idx]
|
|
125
|
+
else:
|
|
126
|
+
# Otherwise keep same env binding
|
|
127
|
+
child_handle.bound_env_id = policy_handle.bound_env_id
|
|
128
|
+
|
|
129
|
+
# Track parent relationship
|
|
130
|
+
snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
|
|
131
|
+
if snapshot_meta:
|
|
132
|
+
snapshot_meta.parent_snapshot_id = policy_id
|
|
133
|
+
|
|
134
|
+
policy_branches[policy_id] = child_ids
|
|
135
|
+
|
|
136
|
+
return BranchResponse(
|
|
137
|
+
env_branches=env_branches,
|
|
138
|
+
policy_branches=policy_branches,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.error(f"Failed to create branches: {e}")
|
|
143
|
+
raise HTTPException(status_code=500, detail=str(e)) from e
|