synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
- examples/multi_step/crafter_rl_lora.md +29 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth_ai/__init__.py +1 -0
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1699 -259
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/RECORD +294 -258
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0
|
@@ -1,60 +1,156 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
"""Task App configuration for the GRPO Crafter example."""
|
|
4
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
5
6
|
import os
|
|
6
7
|
import sys
|
|
8
|
+
from collections.abc import Iterable, Sequence
|
|
7
9
|
from dataclasses import dataclass
|
|
8
10
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
11
|
+
from typing import Any
|
|
10
12
|
|
|
11
|
-
from synth_ai.task.
|
|
13
|
+
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
14
|
+
from synth_ai.task.contracts import RolloutMetrics, RolloutRequest, RolloutResponse, TaskInfo
|
|
12
15
|
from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
|
|
16
|
+
from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
|
|
13
17
|
from synth_ai.task.rubrics import load_rubric
|
|
14
18
|
from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
|
|
15
|
-
from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
|
|
16
|
-
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
17
19
|
from synth_ai.task.tracing_utils import (
|
|
18
20
|
build_tracer_factory,
|
|
19
21
|
resolve_sft_output_dir,
|
|
20
22
|
resolve_tracing_db_url,
|
|
21
23
|
tracing_env_enabled,
|
|
22
24
|
)
|
|
23
|
-
|
|
24
25
|
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
25
26
|
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
DEFAULT_ALIAS_OPS: list[str] = ["agent", "env"] * 10
|
|
30
|
+
DEFAULT_ALIAS_STEP_REWARDS: dict[str, Any] = {
|
|
31
|
+
"enabled": True,
|
|
32
|
+
"mode": "decision_stepwise",
|
|
33
|
+
"indicator_lambda": 1.0,
|
|
34
|
+
"step_beta": 0.0,
|
|
35
|
+
}
|
|
26
36
|
|
|
27
|
-
|
|
28
|
-
TASK_APP_ROOT = REPO_ROOT / "examples" / "warming_up_to_rl" / "task_app"
|
|
29
|
-
SYNTH_ENVS_HOSTED_ROOT = TASK_APP_ROOT / "synth_envs_hosted"
|
|
37
|
+
_HERE = Path(__file__).resolve()
|
|
30
38
|
|
|
31
|
-
for path in [REPO_ROOT, TASK_APP_ROOT, SYNTH_ENVS_HOSTED_ROOT]:
|
|
32
|
-
path_str = str(path)
|
|
33
|
-
if path_str not in sys.path:
|
|
34
|
-
sys.path.insert(0, path_str)
|
|
35
39
|
|
|
40
|
+
def _resolve_repo_root() -> Path:
|
|
41
|
+
"""Best-effort detection of the Synth AI repo root across local and Modal mounts."""
|
|
42
|
+
|
|
43
|
+
candidates: list[Path] = []
|
|
44
|
+
env_root = os.getenv("SYNTH_AI_REPO_ROOT")
|
|
45
|
+
if env_root:
|
|
46
|
+
candidates.append(Path(env_root).expanduser())
|
|
47
|
+
candidates.append(Path("/opt/synth_ai_repo"))
|
|
48
|
+
candidates.extend(parent for parent in [_HERE.parent, *_HERE.parents])
|
|
49
|
+
|
|
50
|
+
for candidate in candidates:
|
|
51
|
+
try:
|
|
52
|
+
resolved = candidate.resolve()
|
|
53
|
+
except Exception:
|
|
54
|
+
continue
|
|
55
|
+
if not resolved.exists():
|
|
56
|
+
continue
|
|
57
|
+
if (resolved / "pyproject.toml").exists() or (resolved / "uv.lock").exists():
|
|
58
|
+
return resolved
|
|
59
|
+
if (resolved / "synth_ai").is_dir():
|
|
60
|
+
return resolved
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
return _HERE.parents[3]
|
|
64
|
+
except IndexError:
|
|
65
|
+
return _HERE.parent
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _resolve_task_app_root(repo_root: Path) -> Path:
|
|
69
|
+
"""Locate the task_app directory even when the module is copied to a temp mount."""
|
|
70
|
+
|
|
71
|
+
preferred = (repo_root / "examples" / "warming_up_to_rl" / "task_app").resolve()
|
|
72
|
+
if preferred.is_dir():
|
|
73
|
+
return preferred
|
|
74
|
+
|
|
75
|
+
local_parent = _HERE.parent.resolve()
|
|
76
|
+
if (local_parent / "synth_envs_hosted").is_dir():
|
|
77
|
+
return local_parent
|
|
78
|
+
|
|
79
|
+
for parent in _HERE.parents:
|
|
80
|
+
candidate = parent.resolve()
|
|
81
|
+
if (candidate / "synth_envs_hosted").is_dir():
|
|
82
|
+
return candidate
|
|
83
|
+
|
|
84
|
+
fallback = Path("/opt/synth_ai_repo/examples/warming_up_to_rl/task_app")
|
|
85
|
+
if fallback.is_dir():
|
|
86
|
+
return fallback.resolve()
|
|
87
|
+
|
|
88
|
+
return local_parent
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
REPO_ROOT = _resolve_repo_root()
|
|
92
|
+
TASK_APP_ROOT = _resolve_task_app_root(REPO_ROOT)
|
|
93
|
+
SYNTH_ENVS_HOSTED_ROOT = (TASK_APP_ROOT / "synth_envs_hosted").resolve()
|
|
94
|
+
|
|
95
|
+
EXAMPLES_ROOT = (REPO_ROOT / "examples").resolve()
|
|
96
|
+
|
|
97
|
+
for path in (REPO_ROOT, TASK_APP_ROOT, SYNTH_ENVS_HOSTED_ROOT, EXAMPLES_ROOT):
|
|
98
|
+
try:
|
|
99
|
+
resolved = path.resolve()
|
|
100
|
+
except Exception:
|
|
101
|
+
resolved = path
|
|
102
|
+
if resolved.exists():
|
|
103
|
+
path_str = str(resolved)
|
|
104
|
+
if path_str not in sys.path:
|
|
105
|
+
sys.path.insert(0, path_str)
|
|
106
|
+
|
|
107
|
+
# Fallback: explicitly add Modal mount path for 'examples' if REPO_ROOT detection fails
|
|
108
|
+
try:
|
|
109
|
+
_hard_examples = Path("/opt/synth_ai_repo/examples")
|
|
110
|
+
if _hard_examples.exists():
|
|
111
|
+
_hard_examples_str = str(_hard_examples.resolve())
|
|
112
|
+
if _hard_examples_str not in sys.path:
|
|
113
|
+
sys.path.insert(0, _hard_examples_str)
|
|
114
|
+
except Exception:
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
HAS_HOSTED = True
|
|
36
118
|
try:
|
|
37
119
|
import crafter # type: ignore
|
|
38
|
-
import crafter.constants as
|
|
39
|
-
from synth_ai.environments.examples.crafter_classic.taskset import TRAIT_BOUNDS
|
|
40
|
-
from synth_envs_hosted.branching import router as branching_router
|
|
41
|
-
from synth_envs_hosted.environment_routes import router as environment_router
|
|
42
|
-
from synth_envs_hosted.hosted_app import TaskApp as HostedTaskApp
|
|
43
|
-
from synth_envs_hosted.policy_routes import router as policy_router
|
|
44
|
-
from synth_envs_hosted.rollout import (
|
|
120
|
+
import crafter.constants as crafter_constants # type: ignore
|
|
121
|
+
from synth_ai.environments.examples.crafter_classic.taskset import TRAIT_BOUNDS
|
|
122
|
+
from synth_envs_hosted.branching import router as branching_router # type: ignore
|
|
123
|
+
from synth_envs_hosted.environment_routes import router as environment_router # type: ignore
|
|
124
|
+
from synth_envs_hosted.hosted_app import TaskApp as HostedTaskApp # type: ignore
|
|
125
|
+
from synth_envs_hosted.policy_routes import router as policy_router # type: ignore
|
|
126
|
+
from synth_envs_hosted.rollout import ( # type: ignore
|
|
45
127
|
RolloutEnvSpec as LegacyRolloutEnvSpec,
|
|
128
|
+
)
|
|
129
|
+
from synth_envs_hosted.rollout import (
|
|
46
130
|
RolloutPolicySpec as LegacyRolloutPolicySpec,
|
|
131
|
+
)
|
|
132
|
+
from synth_envs_hosted.rollout import (
|
|
47
133
|
RolloutRecordConfig as LegacyRolloutRecordConfig,
|
|
134
|
+
)
|
|
135
|
+
from synth_envs_hosted.rollout import (
|
|
48
136
|
RolloutRequest as LegacyRolloutRequest,
|
|
137
|
+
)
|
|
138
|
+
from synth_envs_hosted.rollout import (
|
|
49
139
|
RolloutResponse as LegacyRolloutResponse,
|
|
140
|
+
)
|
|
141
|
+
from synth_envs_hosted.rollout import (
|
|
50
142
|
RolloutSafetyConfig as LegacyRolloutSafetyConfig,
|
|
143
|
+
)
|
|
144
|
+
from synth_envs_hosted.rollout import (
|
|
51
145
|
execute_rollout as legacy_execute_rollout,
|
|
52
146
|
)
|
|
53
147
|
except Exception as exc: # pragma: no cover - import-time validation
|
|
54
148
|
# Provide a more actionable error with the missing module and fix hints
|
|
55
149
|
missing_mod = None
|
|
56
150
|
if isinstance(exc, ModuleNotFoundError):
|
|
57
|
-
missing_mod =
|
|
151
|
+
missing_mod = (
|
|
152
|
+
getattr(exc, "name", None) or str(exc).split("'")[1] if "'" in str(exc) else None
|
|
153
|
+
)
|
|
58
154
|
fix_hint = None
|
|
59
155
|
if missing_mod:
|
|
60
156
|
mapping = {
|
|
@@ -74,12 +170,16 @@ except Exception as exc: # pragma: no cover - import-time validation
|
|
|
74
170
|
f"For Modal: add '{pkg}' to ModalDeploymentConfig.pip_packages in synth_ai/task/apps/grpo_crafter.py.\n"
|
|
75
171
|
f"Locally: pip install {pkg}"
|
|
76
172
|
)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
173
|
+
# Allow running without synth_envs_hosted; gate hosted features off
|
|
174
|
+
if missing_mod == "synth_envs_hosted":
|
|
175
|
+
HAS_HOSTED = False
|
|
176
|
+
else:
|
|
177
|
+
detailed = (
|
|
178
|
+
"grpo_crafter task app requires example dependencies and runtime libs.\n"
|
|
179
|
+
+ (fix_hint + "\n" if fix_hint else "")
|
|
180
|
+
+ f"Original error: {exc}"
|
|
181
|
+
)
|
|
182
|
+
raise RuntimeError(detailed) from exc
|
|
83
183
|
|
|
84
184
|
|
|
85
185
|
CRAFTING_RULES_SYSTEM_HINT = (
|
|
@@ -114,23 +214,23 @@ class CrafterDataset:
|
|
|
114
214
|
area_env = env_value("CRAFTER_AREA", "64,64")
|
|
115
215
|
self.area = tuple(int(x) for x in str(area_env).split(","))
|
|
116
216
|
self.length = int(env_value("CRAFTER_EPISODE_LENGTH", 10000))
|
|
117
|
-
self._cache:
|
|
217
|
+
self._cache: dict[int, dict[str, Any]] = {}
|
|
118
218
|
|
|
119
|
-
def config_for_seed(self, seed: int) ->
|
|
219
|
+
def config_for_seed(self, seed: int) -> dict[str, Any]:
|
|
120
220
|
return {
|
|
121
221
|
"seed": int(seed),
|
|
122
222
|
"area": list(self.area),
|
|
123
223
|
"length": self.length,
|
|
124
224
|
}
|
|
125
225
|
|
|
126
|
-
def describe_seed(self, seed: int) ->
|
|
226
|
+
def describe_seed(self, seed: int) -> dict[str, Any]:
|
|
127
227
|
seed = int(seed)
|
|
128
228
|
if seed in self._cache:
|
|
129
229
|
return self._cache[seed]
|
|
130
230
|
env = crafter.Env(area=self.area, length=self.length, seed=seed)
|
|
131
231
|
try:
|
|
132
232
|
env.reset()
|
|
133
|
-
traits =
|
|
233
|
+
traits = _compute_world_traits(env)
|
|
134
234
|
player = getattr(env, "_player", None)
|
|
135
235
|
inventory = dict(getattr(player, "inventory", {})) if player else {}
|
|
136
236
|
position = getattr(player, "pos", None)
|
|
@@ -149,23 +249,49 @@ class CrafterDataset:
|
|
|
149
249
|
self._cache[seed] = summary
|
|
150
250
|
return summary
|
|
151
251
|
|
|
152
|
-
def _difficulty(self, traits:
|
|
252
|
+
def _difficulty(self, traits: dict[str, int]) -> str:
|
|
153
253
|
for difficulty, bounds in TRAIT_BOUNDS.items():
|
|
154
|
-
if (
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
):
|
|
254
|
+
if traits.get("trees", 0) >= bounds.get("min_trees", 0) and traits.get(
|
|
255
|
+
"hostiles", 0
|
|
256
|
+
) <= bounds.get("max_hostiles", 0):
|
|
158
257
|
return difficulty
|
|
159
258
|
return "custom"
|
|
160
259
|
|
|
161
260
|
@property
|
|
162
|
-
def seed_range(self) ->
|
|
261
|
+
def seed_range(self) -> list[int]:
|
|
163
262
|
return [self.seed_min, self.seed_max]
|
|
164
263
|
|
|
165
264
|
|
|
166
|
-
def
|
|
167
|
-
import
|
|
265
|
+
def _compute_world_traits(env: crafter.Env, radius: int = 10) -> dict[str, int]:
|
|
266
|
+
# Local copy to avoid import-time issues; mirrors synth_ai.environments.examples.crafter_classic.taskset.world_traits
|
|
267
|
+
import numpy as _np # type: ignore
|
|
268
|
+
from crafter import objects as _objects # type: ignore
|
|
269
|
+
|
|
270
|
+
player = getattr(env, "_player", None)
|
|
271
|
+
if player is None:
|
|
272
|
+
return {"trees": 0, "cows": 0, "hostiles": 0}
|
|
273
|
+
pos = _np.array(getattr(player, "pos", [0, 0]))
|
|
274
|
+
counts = {"trees": 0, "cows": 0, "hostiles": 0}
|
|
275
|
+
world = getattr(env, "_world", None)
|
|
276
|
+
objects = getattr(world, "_objects", []) if world is not None else []
|
|
277
|
+
for obj in objects:
|
|
278
|
+
if obj is None or obj is player:
|
|
279
|
+
continue
|
|
280
|
+
try:
|
|
281
|
+
if _np.abs(obj.pos - pos).sum() > radius:
|
|
282
|
+
continue
|
|
283
|
+
except Exception:
|
|
284
|
+
continue
|
|
285
|
+
if isinstance(obj, _objects.Plant) and getattr(obj, "kind", "") == "tree":
|
|
286
|
+
counts["trees"] += 1
|
|
287
|
+
elif isinstance(obj, _objects.Cow):
|
|
288
|
+
counts["cows"] += 1
|
|
289
|
+
elif isinstance(obj, _objects.Zombie | _objects.Skeleton):
|
|
290
|
+
counts["hostiles"] += 1
|
|
291
|
+
return counts
|
|
292
|
+
|
|
168
293
|
|
|
294
|
+
def env_value(key: str, default: Any) -> Any:
|
|
169
295
|
return os.getenv(key, default)
|
|
170
296
|
|
|
171
297
|
|
|
@@ -182,8 +308,8 @@ def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
|
|
|
182
308
|
environments=["crafter"],
|
|
183
309
|
action_space={
|
|
184
310
|
"type": "discrete",
|
|
185
|
-
"size": len(
|
|
186
|
-
"actions": list(
|
|
311
|
+
"size": len(crafter_constants.actions),
|
|
312
|
+
"actions": list(crafter_constants.actions),
|
|
187
313
|
},
|
|
188
314
|
observation={
|
|
189
315
|
"summary": "RGB frame plus inventory, achievements, and semantic map patches.",
|
|
@@ -254,7 +380,7 @@ EVENTS_RUBRIC = load_rubric(
|
|
|
254
380
|
)
|
|
255
381
|
|
|
256
382
|
|
|
257
|
-
def describe_taskset(dataset: CrafterDataset) ->
|
|
383
|
+
def describe_taskset(dataset: CrafterDataset) -> dict[str, Any]:
|
|
258
384
|
return {
|
|
259
385
|
**DATASET_SPEC.model_dump(),
|
|
260
386
|
"seed_range": dataset.seed_range,
|
|
@@ -266,7 +392,9 @@ def describe_taskset(dataset: CrafterDataset) -> Dict[str, Any]:
|
|
|
266
392
|
}
|
|
267
393
|
|
|
268
394
|
|
|
269
|
-
def provide_task_instances(
|
|
395
|
+
def provide_task_instances(
|
|
396
|
+
dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
|
|
397
|
+
) -> Iterable[TaskInfo]:
|
|
270
398
|
infos: list[TaskInfo] = []
|
|
271
399
|
for seed_value in seeds:
|
|
272
400
|
summary = dataset.describe_seed(seed_value)
|
|
@@ -314,20 +442,144 @@ def _normalise_op(op_value: Any, index: int) -> str:
|
|
|
314
442
|
raise ValueError(f"Unsupported op type '{candidate}' at index {index}")
|
|
315
443
|
|
|
316
444
|
|
|
445
|
+
def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
|
|
446
|
+
"""Map legacy math env/policy names to crafter and enrich rollout defaults."""
|
|
447
|
+
|
|
448
|
+
def _needs_crafter(name: str | None) -> bool:
|
|
449
|
+
if not name:
|
|
450
|
+
return False
|
|
451
|
+
lowered = str(name).strip().lower()
|
|
452
|
+
return lowered.startswith("math")
|
|
453
|
+
|
|
454
|
+
env_updates: dict[str, Any] = {}
|
|
455
|
+
policy_updates: dict[str, Any] = {}
|
|
456
|
+
alias_applied = False
|
|
457
|
+
|
|
458
|
+
if _needs_crafter(request.env.env_name):
|
|
459
|
+
env_updates["env_name"] = "crafter"
|
|
460
|
+
alias_applied = True
|
|
461
|
+
if request.env.env_id and _needs_crafter(request.env.env_id):
|
|
462
|
+
env_updates["env_id"] = None
|
|
463
|
+
alias_applied = True
|
|
464
|
+
if _needs_crafter(request.policy.policy_name):
|
|
465
|
+
policy_updates["policy_name"] = "crafter-react"
|
|
466
|
+
alias_applied = True
|
|
467
|
+
if request.policy.policy_id and _needs_crafter(request.policy.policy_id):
|
|
468
|
+
policy_updates["policy_id"] = None
|
|
469
|
+
alias_applied = True
|
|
470
|
+
|
|
471
|
+
if not alias_applied:
|
|
472
|
+
return request
|
|
473
|
+
|
|
474
|
+
updated_env = request.env.model_copy(update=env_updates) if env_updates else request.env
|
|
475
|
+
updated_policy = (
|
|
476
|
+
request.policy.model_copy(update=policy_updates) if policy_updates else request.policy
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
env_cfg = dict(updated_env.config or {})
|
|
480
|
+
env_cfg.setdefault("difficulty", "normal")
|
|
481
|
+
env_cfg.setdefault("step_rewards", dict(DEFAULT_ALIAS_STEP_REWARDS))
|
|
482
|
+
env_cfg.setdefault("env_params", {"max_steps_per_episode": 200})
|
|
483
|
+
updated_env = updated_env.model_copy(update={"config": env_cfg})
|
|
484
|
+
|
|
485
|
+
policy_cfg = dict(updated_policy.config or {})
|
|
486
|
+
policy_cfg.setdefault("max_llm_calls", 10)
|
|
487
|
+
policy_cfg.setdefault("max_completion_tokens", 1024)
|
|
488
|
+
policy_cfg.setdefault("temperature", 0.2)
|
|
489
|
+
policy_cfg.setdefault("step_rewards", dict(DEFAULT_ALIAS_STEP_REWARDS))
|
|
490
|
+
updated_policy = updated_policy.model_copy(update={"config": policy_cfg})
|
|
491
|
+
|
|
492
|
+
ops_override = request.ops
|
|
493
|
+
if not ops_override or len(ops_override) < len(DEFAULT_ALIAS_OPS):
|
|
494
|
+
ops_override = list(DEFAULT_ALIAS_OPS)
|
|
495
|
+
|
|
496
|
+
coerced = request.model_copy(update={"env": updated_env, "policy": updated_policy, "ops": ops_override})
|
|
497
|
+
|
|
498
|
+
try:
|
|
499
|
+
print(
|
|
500
|
+
"[rollout] remapped math request -> crafter "
|
|
501
|
+
f"(env={request.env.env_name!r}→{coerced.env.env_name!r}, "
|
|
502
|
+
f"policy={request.policy.policy_name!r}→{coerced.policy.policy_name!r})",
|
|
503
|
+
flush=True,
|
|
504
|
+
)
|
|
505
|
+
except Exception:
|
|
506
|
+
pass
|
|
507
|
+
try:
|
|
508
|
+
logger.info(
|
|
509
|
+
"ROLLOUT_ALIAS: remapped math env/policy to crafter (env=%s→%s, policy=%s→%s)",
|
|
510
|
+
request.env.env_name,
|
|
511
|
+
coerced.env.env_name,
|
|
512
|
+
request.policy.policy_name,
|
|
513
|
+
coerced.policy.policy_name,
|
|
514
|
+
)
|
|
515
|
+
except Exception:
|
|
516
|
+
pass
|
|
517
|
+
|
|
518
|
+
return coerced
|
|
519
|
+
|
|
520
|
+
|
|
317
521
|
async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
|
|
318
|
-
|
|
522
|
+
# If hosted env service code is not bundled, return a no-op rollout response compatible with contracts
|
|
523
|
+
if not HAS_HOSTED:
|
|
524
|
+
return RolloutResponse(
|
|
525
|
+
run_id=request.run_id,
|
|
526
|
+
trajectories=[],
|
|
527
|
+
branches={},
|
|
528
|
+
metrics=RolloutMetrics(
|
|
529
|
+
episode_returns=[],
|
|
530
|
+
mean_return=0.0,
|
|
531
|
+
num_steps=0,
|
|
532
|
+
num_episodes=0,
|
|
533
|
+
details={},
|
|
534
|
+
),
|
|
535
|
+
aborted=False,
|
|
536
|
+
ops_executed=0,
|
|
537
|
+
trace=None,
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
request = _coerce_math_to_crafter(request)
|
|
541
|
+
|
|
542
|
+
policy_cfg = dict(request.policy.config or {})
|
|
543
|
+
try:
|
|
544
|
+
max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
|
|
545
|
+
except Exception:
|
|
546
|
+
max_llm_calls = 10
|
|
547
|
+
policy_cfg.setdefault("max_llm_calls", max_llm_calls)
|
|
548
|
+
policy_cfg.setdefault("max_tokens", 512)
|
|
549
|
+
policy_cfg.setdefault("max_completion_tokens", 512)
|
|
550
|
+
policy_cfg.setdefault("temperature", 0.2)
|
|
551
|
+
policy_cfg.setdefault("top_p", 0.95)
|
|
552
|
+
|
|
553
|
+
env_cfg = dict(request.env.config or {})
|
|
554
|
+
env_params = dict(env_cfg.get("env_params") or {})
|
|
555
|
+
try:
|
|
556
|
+
max_steps_episode = int(env_params.get("max_steps_per_episode") or max_llm_calls)
|
|
557
|
+
except Exception:
|
|
558
|
+
max_steps_episode = max_llm_calls
|
|
559
|
+
desired_steps = max(max_llm_calls, max_steps_episode)
|
|
560
|
+
env_params["max_steps_per_episode"] = int(desired_steps)
|
|
561
|
+
env_cfg["env_params"] = env_params
|
|
562
|
+
|
|
563
|
+
updated_policy = request.policy.model_copy(update={"config": policy_cfg})
|
|
564
|
+
updated_env = request.env.model_copy(update={"config": env_cfg})
|
|
565
|
+
request = request.model_copy(update={"policy": updated_policy, "env": updated_env})
|
|
566
|
+
|
|
567
|
+
converted_ops: list[str] = [_normalise_op(op, idx) for idx, op in enumerate(request.ops)]
|
|
568
|
+
max_ops_allowed = max_llm_calls * 2 if max_llm_calls > 0 else len(converted_ops)
|
|
569
|
+
if max_ops_allowed and len(converted_ops) > max_ops_allowed:
|
|
570
|
+
converted_ops = converted_ops[:max_ops_allowed]
|
|
319
571
|
legacy_request = LegacyRolloutRequest(
|
|
320
572
|
run_id=request.run_id,
|
|
321
573
|
env=LegacyRolloutEnvSpec(
|
|
322
574
|
env_id=request.env.env_id,
|
|
323
575
|
env_name=request.env.env_name,
|
|
324
|
-
config=
|
|
576
|
+
config=env_cfg,
|
|
325
577
|
seed=request.env.seed,
|
|
326
578
|
),
|
|
327
579
|
policy=LegacyRolloutPolicySpec(
|
|
328
580
|
policy_id=request.policy.policy_id,
|
|
329
581
|
policy_name=request.policy.policy_name,
|
|
330
|
-
config=
|
|
582
|
+
config=policy_cfg,
|
|
331
583
|
),
|
|
332
584
|
ops=converted_ops,
|
|
333
585
|
record=LegacyRolloutRecordConfig(**request.record.model_dump()),
|
|
@@ -338,7 +590,9 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
338
590
|
synth_base_url=request.synth_base_url,
|
|
339
591
|
)
|
|
340
592
|
|
|
341
|
-
legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
|
|
593
|
+
legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
|
|
594
|
+
legacy_request, fastapi_request
|
|
595
|
+
)
|
|
342
596
|
data = legacy_response.model_dump()
|
|
343
597
|
metrics = data.get("metrics", {}) or {}
|
|
344
598
|
metrics.setdefault("outcome_score", None)
|
|
@@ -352,14 +606,16 @@ def build_config() -> TaskAppConfig:
|
|
|
352
606
|
registry, dataset = build_dataset()
|
|
353
607
|
base_info = _base_task_info(dataset)
|
|
354
608
|
|
|
355
|
-
hosted_task_app = HostedTaskApp()
|
|
609
|
+
hosted_task_app = HostedTaskApp() if HAS_HOSTED else None
|
|
356
610
|
|
|
357
611
|
tracing_enabled = tracing_env_enabled()
|
|
358
612
|
tracing_db_url = resolve_tracing_db_url()
|
|
359
|
-
tracer_factory = build_tracer_factory(
|
|
613
|
+
tracer_factory = build_tracer_factory(
|
|
614
|
+
SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
|
|
615
|
+
)
|
|
360
616
|
sft_output_dir = resolve_sft_output_dir()
|
|
361
617
|
|
|
362
|
-
app_state:
|
|
618
|
+
app_state: dict[str, Any] = {
|
|
363
619
|
"task_app": hosted_task_app,
|
|
364
620
|
"allowed_environments": ["crafter"],
|
|
365
621
|
"tracing_enabled": tracing_enabled,
|
|
@@ -377,12 +633,14 @@ def build_config() -> TaskAppConfig:
|
|
|
377
633
|
if sft_output_dir:
|
|
378
634
|
print(f"[task:sft] writing JSONL to {sft_output_dir}", flush=True)
|
|
379
635
|
|
|
380
|
-
def _describe_taskset() ->
|
|
636
|
+
def _describe_taskset() -> dict[str, Any]:
|
|
381
637
|
return describe_taskset(dataset)
|
|
382
638
|
|
|
383
639
|
def _provide_instances(seeds: Sequence[int]):
|
|
384
640
|
return provide_task_instances(dataset, base_info, seeds)
|
|
385
641
|
|
|
642
|
+
routers: tuple = (environment_router, policy_router, branching_router) if HAS_HOSTED else ()
|
|
643
|
+
|
|
386
644
|
config = TaskAppConfig(
|
|
387
645
|
app_id="grpo-crafter",
|
|
388
646
|
name="GRPO Crafter Task App",
|
|
@@ -393,8 +651,10 @@ def build_config() -> TaskAppConfig:
|
|
|
393
651
|
rollout=rollout_executor,
|
|
394
652
|
dataset_registry=registry,
|
|
395
653
|
rubrics=RubricBundle(outcome=OUTCOME_RUBRIC, events=EVENTS_RUBRIC),
|
|
396
|
-
proxy=ProxyConfig(
|
|
397
|
-
|
|
654
|
+
proxy=ProxyConfig(
|
|
655
|
+
enable_openai=True, enable_groq=True, system_hint=CRAFTING_RULES_SYSTEM_HINT
|
|
656
|
+
),
|
|
657
|
+
routers=routers,
|
|
398
658
|
app_state=app_state,
|
|
399
659
|
cors_origins=["*"],
|
|
400
660
|
)
|
|
@@ -426,10 +686,12 @@ register_task_app(
|
|
|
426
686
|
"crafter",
|
|
427
687
|
),
|
|
428
688
|
extra_local_dirs=(
|
|
429
|
-
|
|
430
|
-
(str(
|
|
689
|
+
# Mount repo root so local modules resolve when deployed on Modal
|
|
690
|
+
(str(REPO_ROOT), "/opt/synth_ai_repo"),
|
|
691
|
+
(str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
|
|
692
|
+
(str(TASK_APP_ROOT), "/opt/synth_ai_repo/examples/warming_up_to_rl/task_app"),
|
|
431
693
|
),
|
|
432
|
-
secret_names=("
|
|
694
|
+
secret_names=("groq-api-key", "openai-api-key"),
|
|
433
695
|
memory=16384,
|
|
434
696
|
cpu=4.0,
|
|
435
697
|
max_containers=10,
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
|
|
2
1
|
"""Compatibility wrapper for the GRPO Crafter task app.
|
|
3
2
|
|
|
4
|
-
This module now delegates to the
|
|
5
|
-
`
|
|
6
|
-
file directly or targeting `fastapi_app` from external tooling). Prefer using
|
|
3
|
+
This module now delegates to the TaskAppConfig defined in the colocated example at
|
|
4
|
+
`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
|
+
(running the file directly or targeting `fastapi_app` from external tooling). Prefer using
|
|
7
6
|
`uvx synth-ai serve grpo-crafter` for local development and testing.
|
|
8
7
|
"""
|
|
9
8
|
|
|
@@ -15,37 +14,19 @@ from pathlib import Path
|
|
|
15
14
|
from fastapi.exceptions import RequestValidationError
|
|
16
15
|
from fastapi.responses import JSONResponse
|
|
17
16
|
from starlette.requests import Request
|
|
18
|
-
|
|
19
17
|
from synth_ai.task.apps import ModalDeploymentConfig, registry
|
|
20
|
-
from .grpo_crafter import build_config
|
|
21
18
|
from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
|
|
22
19
|
from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
|
|
23
20
|
|
|
21
|
+
from .grpo_crafter import build_config
|
|
24
22
|
|
|
25
23
|
APP_ID = "grpo-crafter"
|
|
26
24
|
|
|
27
25
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
description=_BASE_CONFIG.description,
|
|
33
|
-
base_task_info=_BASE_CONFIG.base_task_info,
|
|
34
|
-
describe_taskset=_BASE_CONFIG.describe_taskset,
|
|
35
|
-
provide_task_instances=_BASE_CONFIG.provide_task_instances,
|
|
36
|
-
rollout=_BASE_CONFIG.rollout,
|
|
37
|
-
dataset_registry=_BASE_CONFIG.dataset_registry,
|
|
38
|
-
rubrics=_BASE_CONFIG.rubrics,
|
|
39
|
-
proxy=_BASE_CONFIG.proxy,
|
|
40
|
-
routers=_BASE_CONFIG.routers,
|
|
41
|
-
middleware=_BASE_CONFIG.middleware,
|
|
42
|
-
app_state=_BASE_CONFIG.app_state,
|
|
43
|
-
require_api_key=_BASE_CONFIG.require_api_key,
|
|
44
|
-
expose_debug_env=_BASE_CONFIG.expose_debug_env,
|
|
45
|
-
cors_origins=_BASE_CONFIG.cors_origins,
|
|
46
|
-
startup_hooks=_BASE_CONFIG.startup_hooks,
|
|
47
|
-
shutdown_hooks=_BASE_CONFIG.shutdown_hooks,
|
|
48
|
-
)
|
|
26
|
+
def _build_base_config() -> TaskAppConfig:
|
|
27
|
+
# Lazily construct the base config to avoid heavy work at import time
|
|
28
|
+
return build_config()
|
|
29
|
+
|
|
49
30
|
|
|
50
31
|
try:
|
|
51
32
|
_REGISTERED_ENTRY = registry.get(APP_ID)
|
|
@@ -59,8 +40,8 @@ else:
|
|
|
59
40
|
|
|
60
41
|
def build_task_app_config() -> TaskAppConfig:
|
|
61
42
|
"""Return a fresh TaskAppConfig for this wrapper."""
|
|
62
|
-
|
|
63
|
-
return
|
|
43
|
+
base = _build_base_config()
|
|
44
|
+
return base.clone()
|
|
64
45
|
|
|
65
46
|
|
|
66
47
|
def fastapi_app():
|
|
@@ -122,7 +103,7 @@ def fastapi_app():
|
|
|
122
103
|
try:
|
|
123
104
|
hdr = request.headers
|
|
124
105
|
snapshot = {
|
|
125
|
-
"path": str(
|
|
106
|
+
"path": str(request.url.path),
|
|
126
107
|
"have_x_api_key": bool(hdr.get("x-api-key")),
|
|
127
108
|
"have_x_api_keys": bool(hdr.get("x-api-keys")),
|
|
128
109
|
"have_authorization": bool(hdr.get("authorization")),
|