synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +20 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1709 -243
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,21 +1,22 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
"""Task app configuration for a single-step math reasoning environment."""
|
|
4
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
5
|
import contextlib
|
|
6
6
|
import os
|
|
7
7
|
import random
|
|
8
8
|
import re
|
|
9
9
|
import uuid
|
|
10
|
+
from collections.abc import Iterable, Mapping, MutableMapping, Sequence
|
|
10
11
|
from dataclasses import dataclass
|
|
11
12
|
from pathlib import Path
|
|
12
|
-
from typing import Any,
|
|
13
|
+
from typing import Any, cast
|
|
13
14
|
|
|
14
15
|
import httpx
|
|
15
16
|
from datasets import load_dataset
|
|
16
17
|
from fastapi import APIRouter, HTTPException, Request
|
|
17
18
|
from pydantic import BaseModel, Field
|
|
18
|
-
|
|
19
|
+
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
19
20
|
from synth_ai.task.contracts import (
|
|
20
21
|
RolloutMetrics,
|
|
21
22
|
RolloutRequest,
|
|
@@ -25,9 +26,9 @@ from synth_ai.task.contracts import (
|
|
|
25
26
|
TaskInfo,
|
|
26
27
|
)
|
|
27
28
|
from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
|
|
29
|
+
from synth_ai.task.errors import http_exception
|
|
28
30
|
from synth_ai.task.rubrics import Rubric, load_rubric
|
|
29
31
|
from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
|
|
30
|
-
from synth_ai.task.errors import http_exception
|
|
31
32
|
from synth_ai.task.tracing_utils import (
|
|
32
33
|
build_tracer_factory,
|
|
33
34
|
resolve_sft_output_dir,
|
|
@@ -35,13 +36,14 @@ from synth_ai.task.tracing_utils import (
|
|
|
35
36
|
tracing_env_enabled,
|
|
36
37
|
)
|
|
37
38
|
from synth_ai.task.vendors import normalize_vendor_keys
|
|
38
|
-
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
39
39
|
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
40
40
|
|
|
41
41
|
REPO_ROOT = Path(__file__).resolve().parents[3]
|
|
42
42
|
|
|
43
|
-
_modal_volume_candidate = Path(
|
|
44
|
-
|
|
43
|
+
_modal_volume_candidate = Path(
|
|
44
|
+
os.getenv("MATH_MODAL_DATASET_DIR", "/modal_volumes/math_dataset")
|
|
45
|
+
).expanduser()
|
|
46
|
+
_modal_volume_root: Path | None = None
|
|
45
47
|
try:
|
|
46
48
|
_modal_volume_candidate.mkdir(parents=True, exist_ok=True)
|
|
47
49
|
_modal_volume_root = _modal_volume_candidate
|
|
@@ -55,7 +57,9 @@ if _modal_volume_root is not None:
|
|
|
55
57
|
local_dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
56
58
|
os.environ.setdefault("MATH_DATASET_LOCAL_DIR", str(local_dataset_dir))
|
|
57
59
|
else:
|
|
58
|
-
hf_cache_path = Path(
|
|
60
|
+
hf_cache_path = Path(
|
|
61
|
+
os.getenv("MATH_DATASET_CACHE_DIR", str(REPO_ROOT / ".cache" / "hf-datasets"))
|
|
62
|
+
).expanduser()
|
|
59
63
|
|
|
60
64
|
hf_cache_path.mkdir(parents=True, exist_ok=True)
|
|
61
65
|
os.environ.setdefault("MATH_DATASET_CACHE_DIR", str(hf_cache_path))
|
|
@@ -101,7 +105,7 @@ MATH_DATASET_SPEC = TaskDatasetSpec(
|
|
|
101
105
|
_BOXED_MARKERS: tuple[str, ...] = ("\\boxed", "boxed")
|
|
102
106
|
|
|
103
107
|
|
|
104
|
-
def _extract_boxed(text: str) ->
|
|
108
|
+
def _extract_boxed(text: str) -> str | None:
|
|
105
109
|
if not text:
|
|
106
110
|
return None
|
|
107
111
|
for marker in _BOXED_MARKERS:
|
|
@@ -170,9 +174,9 @@ class MathDataset:
|
|
|
170
174
|
self.name = name
|
|
171
175
|
self.config = config
|
|
172
176
|
self.splits = [split for split in splits if split]
|
|
173
|
-
self._cache:
|
|
177
|
+
self._cache: dict[str, Any] = {}
|
|
174
178
|
self._local_dir = os.getenv("MATH_DATASET_LOCAL_DIR")
|
|
175
|
-
self._hf_token:
|
|
179
|
+
self._hf_token: str | None = None
|
|
176
180
|
for key in HF_TOKEN_ENV_KEYS:
|
|
177
181
|
value = os.getenv(key)
|
|
178
182
|
if value:
|
|
@@ -182,7 +186,7 @@ class MathDataset:
|
|
|
182
186
|
break
|
|
183
187
|
# No multi-candidate fallback: enforce explicit dataset id
|
|
184
188
|
|
|
185
|
-
def _local_file_for_split(self, split: str) ->
|
|
189
|
+
def _local_file_for_split(self, split: str) -> Path | None:
|
|
186
190
|
specific = os.getenv(f"MATH_DATASET_LOCAL_{split.upper()}_FILE")
|
|
187
191
|
if specific:
|
|
188
192
|
path = Path(specific).expanduser()
|
|
@@ -203,11 +207,13 @@ class MathDataset:
|
|
|
203
207
|
if split not in self._cache:
|
|
204
208
|
local_file = self._local_file_for_split(split)
|
|
205
209
|
if local_file is not None:
|
|
206
|
-
dataset = load_dataset(
|
|
210
|
+
dataset = load_dataset(
|
|
211
|
+
"json", data_files=str(local_file), cache_dir=str(HF_DATASETS_CACHE)
|
|
212
|
+
)
|
|
207
213
|
self._cache[split] = dataset["train"]
|
|
208
214
|
else:
|
|
209
215
|
try:
|
|
210
|
-
load_kwargs:
|
|
216
|
+
load_kwargs: dict[str, Any] = {"split": split}
|
|
211
217
|
if self.config:
|
|
212
218
|
load_kwargs["name"] = self.config
|
|
213
219
|
if self._hf_token:
|
|
@@ -221,7 +227,7 @@ class MathDataset:
|
|
|
221
227
|
tmp_path = target.with_name(target.name + ".tmp")
|
|
222
228
|
try:
|
|
223
229
|
local_dir.mkdir(parents=True, exist_ok=True)
|
|
224
|
-
|
|
230
|
+
ds.to_json(str(tmp_path))
|
|
225
231
|
tmp_path.replace(target)
|
|
226
232
|
except Exception:
|
|
227
233
|
with contextlib.suppress(FileNotFoundError):
|
|
@@ -235,7 +241,7 @@ class MathDataset:
|
|
|
235
241
|
raise RuntimeError(" ".join(hints)) from exc
|
|
236
242
|
return self._cache[split]
|
|
237
243
|
|
|
238
|
-
def sample(self, *, split: str, index:
|
|
244
|
+
def sample(self, *, split: str, index: int | None = None) -> dict[str, Any]:
|
|
239
245
|
dataset = self._load_split(split)
|
|
240
246
|
if len(dataset) == 0:
|
|
241
247
|
raise RuntimeError(f"Dataset split '{split}' is empty")
|
|
@@ -301,9 +307,7 @@ class MathDataset:
|
|
|
301
307
|
except Exception as exc:
|
|
302
308
|
errors.append(f"{split}: {exc}")
|
|
303
309
|
if errors:
|
|
304
|
-
raise RuntimeError(
|
|
305
|
-
"Dataset preparation failed:\n" + "\n".join(errors)
|
|
306
|
-
)
|
|
310
|
+
raise RuntimeError("Dataset preparation failed:\n" + "\n".join(errors))
|
|
307
311
|
|
|
308
312
|
|
|
309
313
|
@dataclass
|
|
@@ -322,9 +326,9 @@ class MathEnvironmentManager:
|
|
|
322
326
|
|
|
323
327
|
def __init__(self, dataset: MathDataset) -> None:
|
|
324
328
|
self.dataset = dataset
|
|
325
|
-
self._states:
|
|
329
|
+
self._states: dict[str, MathEnvState] = {}
|
|
326
330
|
|
|
327
|
-
def create(self, *, split: str, index:
|
|
331
|
+
def create(self, *, split: str, index: int | None, seed: int | None) -> MathEnvState:
|
|
328
332
|
if index is None and seed is not None:
|
|
329
333
|
index = seed
|
|
330
334
|
sample = self.dataset.sample(split=split, index=index)
|
|
@@ -350,11 +354,11 @@ class MathEnvironmentManager:
|
|
|
350
354
|
|
|
351
355
|
|
|
352
356
|
class InitializePayload(BaseModel):
|
|
353
|
-
seed:
|
|
354
|
-
config:
|
|
357
|
+
seed: int | None = None
|
|
358
|
+
config: dict[str, Any] = Field(default_factory=dict)
|
|
355
359
|
|
|
356
360
|
|
|
357
|
-
def _observation_from_state(state: MathEnvState) ->
|
|
361
|
+
def _observation_from_state(state: MathEnvState) -> dict[str, Any]:
|
|
358
362
|
return {
|
|
359
363
|
"problem": state.problem,
|
|
360
364
|
"split": state.split,
|
|
@@ -362,7 +366,9 @@ def _observation_from_state(state: MathEnvState) -> Dict[str, Any]:
|
|
|
362
366
|
}
|
|
363
367
|
|
|
364
368
|
|
|
365
|
-
def _score_submission(
|
|
369
|
+
def _score_submission(
|
|
370
|
+
state: MathEnvState, tool_calls: Sequence[Mapping[str, Any]]
|
|
371
|
+
) -> tuple[float, str, bool]:
|
|
366
372
|
if not tool_calls:
|
|
367
373
|
return REWARD_NEGATIVE_NO_TOOL, "missing_tool_call", False
|
|
368
374
|
call = tool_calls[0]
|
|
@@ -374,14 +380,61 @@ def _score_submission(state: MathEnvState, tool_calls: Sequence[Mapping[str, Any
|
|
|
374
380
|
if not answer:
|
|
375
381
|
return REWARD_NEGATIVE_NO_ANSWER, "blank_answer", False
|
|
376
382
|
is_correct = answer == state.answer
|
|
377
|
-
return (
|
|
383
|
+
return (
|
|
384
|
+
(REWARD_POSITIVE if is_correct else 0.0),
|
|
385
|
+
("correct" if is_correct else "incorrect"),
|
|
386
|
+
is_correct,
|
|
387
|
+
)
|
|
378
388
|
|
|
379
389
|
|
|
380
390
|
math_router = APIRouter()
|
|
381
391
|
|
|
382
392
|
|
|
393
|
+
def _preview_tool_calls(tool_calls: Sequence[Mapping[str, Any]]) -> list[dict[str, Any]]:
|
|
394
|
+
"""Return a compact, log-friendly preview of tool calls.
|
|
395
|
+
|
|
396
|
+
Truncates long fields to avoid noisy logs and leaking excessive content.
|
|
397
|
+
"""
|
|
398
|
+
preview: list[dict[str, Any]] = []
|
|
399
|
+
for call in list(tool_calls or [])[:3]:
|
|
400
|
+
args = dict(call.get("args") or {})
|
|
401
|
+
answer = str(args.get("answer") or "")
|
|
402
|
+
# Hard truncate to keep logs compact
|
|
403
|
+
answer_short = answer[:120] + ("…" if len(answer) > 120 else "")
|
|
404
|
+
preview.append(
|
|
405
|
+
{
|
|
406
|
+
"tool": call.get("tool"),
|
|
407
|
+
"answer": answer_short,
|
|
408
|
+
}
|
|
409
|
+
)
|
|
410
|
+
return preview
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def _event_and_outcome_components(
|
|
414
|
+
tool_calls: Sequence[Mapping[str, Any]], *, correct: bool, reward: float
|
|
415
|
+
) -> dict[str, float]:
|
|
416
|
+
"""Approximate component-wise scores for RL-style logs.
|
|
417
|
+
|
|
418
|
+
- env: task-level scalar reward (our single-step outcome)
|
|
419
|
+
- rubric_event: 1.0 if a valid tool call with non-empty answer was made else 0.0
|
|
420
|
+
- rubric_outcome: 1.0 if final answer was correct else 0.0
|
|
421
|
+
"""
|
|
422
|
+
has_valid_tool = False
|
|
423
|
+
if tool_calls:
|
|
424
|
+
first = tool_calls[0] or {}
|
|
425
|
+
if str(first.get("tool") or "") == TOOL_NAME:
|
|
426
|
+
args = first.get("args") or {}
|
|
427
|
+
ans = str(args.get("answer") or "").strip()
|
|
428
|
+
has_valid_tool = bool(ans)
|
|
429
|
+
return {
|
|
430
|
+
"env": float(reward),
|
|
431
|
+
"rubric_event": 1.0 if has_valid_tool else 0.0,
|
|
432
|
+
"rubric_outcome": 1.0 if bool(correct) else 0.0,
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
|
|
383
436
|
@math_router.post("/env/math/initialize")
|
|
384
|
-
async def initialize_env(request: Request, payload: InitializePayload) ->
|
|
437
|
+
async def initialize_env(request: Request, payload: InitializePayload) -> dict[str, Any]:
|
|
385
438
|
manager: MathEnvironmentManager = request.app.state.math_env_manager
|
|
386
439
|
split = str(payload.config.get("split") or DEFAULT_SPLIT)
|
|
387
440
|
seed = payload.seed
|
|
@@ -397,7 +450,7 @@ async def initialize_env(request: Request, payload: InitializePayload) -> Dict[s
|
|
|
397
450
|
|
|
398
451
|
|
|
399
452
|
@math_router.post("/env/math/step")
|
|
400
|
-
async def step_env(request: Request, payload:
|
|
453
|
+
async def step_env(request: Request, payload: dict[str, Any]) -> dict[str, Any]:
|
|
401
454
|
manager: MathEnvironmentManager = request.app.state.math_env_manager
|
|
402
455
|
env_id = str(payload.get("env_id") or "")
|
|
403
456
|
if not env_id:
|
|
@@ -410,6 +463,26 @@ async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
410
463
|
action = payload.get("action") or {}
|
|
411
464
|
tool_calls = action.get("tool_calls") or payload.get("tool_calls") or []
|
|
412
465
|
reward, status, correct = _score_submission(state, tool_calls)
|
|
466
|
+
with contextlib.suppress(Exception):
|
|
467
|
+
print(
|
|
468
|
+
"[MATH_STEP] env_id=",
|
|
469
|
+
state.env_id,
|
|
470
|
+
" split=",
|
|
471
|
+
state.split,
|
|
472
|
+
" index=",
|
|
473
|
+
state.index,
|
|
474
|
+
" calls=",
|
|
475
|
+
_preview_tool_calls(tool_calls),
|
|
476
|
+
" reward=",
|
|
477
|
+
reward,
|
|
478
|
+
" status=",
|
|
479
|
+
status,
|
|
480
|
+
" correct=",
|
|
481
|
+
correct,
|
|
482
|
+
" components=",
|
|
483
|
+
_event_and_outcome_components(tool_calls, correct=correct, reward=reward),
|
|
484
|
+
flush=True,
|
|
485
|
+
)
|
|
413
486
|
state.done = True
|
|
414
487
|
|
|
415
488
|
observation = _observation_from_state(state)
|
|
@@ -427,7 +500,7 @@ async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
427
500
|
|
|
428
501
|
|
|
429
502
|
@math_router.post("/env/math/terminate")
|
|
430
|
-
async def terminate_env(request: Request, payload:
|
|
503
|
+
async def terminate_env(request: Request, payload: dict[str, Any]) -> dict[str, Any]:
|
|
431
504
|
manager: MathEnvironmentManager = request.app.state.math_env_manager
|
|
432
505
|
env_id = str(payload.get("env_id") or "")
|
|
433
506
|
if env_id:
|
|
@@ -448,7 +521,9 @@ def _resolve_inference_url(base_url: str) -> str:
|
|
|
448
521
|
return f"{normalized}/v1/chat/completions"
|
|
449
522
|
|
|
450
523
|
|
|
451
|
-
async def _call_inference(
|
|
524
|
+
async def _call_inference(
|
|
525
|
+
policy_config: Mapping[str, Any], observation: Mapping[str, Any]
|
|
526
|
+
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
452
527
|
inference_url = str(policy_config.get("inference_url") or "").rstrip("/")
|
|
453
528
|
if not inference_url:
|
|
454
529
|
raise RuntimeError("policy.config.inference_url required for rollout")
|
|
@@ -480,7 +555,7 @@ async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping
|
|
|
480
555
|
},
|
|
481
556
|
]
|
|
482
557
|
|
|
483
|
-
payload:
|
|
558
|
+
payload: dict[str, Any] = {
|
|
484
559
|
"model": model,
|
|
485
560
|
"messages": messages,
|
|
486
561
|
"tools": [
|
|
@@ -549,7 +624,7 @@ async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping
|
|
|
549
624
|
function = call.get("function") or {}
|
|
550
625
|
name = function.get("name")
|
|
551
626
|
arguments = function.get("arguments")
|
|
552
|
-
parsed_args:
|
|
627
|
+
parsed_args: dict[str, Any]
|
|
553
628
|
if isinstance(arguments, str):
|
|
554
629
|
try:
|
|
555
630
|
import json
|
|
@@ -562,6 +637,15 @@ async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping
|
|
|
562
637
|
else:
|
|
563
638
|
parsed_args = {}
|
|
564
639
|
tool_calls.append({"tool": name, "args": parsed_args})
|
|
640
|
+
# Lightweight provider-side logging
|
|
641
|
+
with contextlib.suppress(Exception):
|
|
642
|
+
print(
|
|
643
|
+
"[MATH_INFER] model=",
|
|
644
|
+
model,
|
|
645
|
+
" calls=",
|
|
646
|
+
_preview_tool_calls(tool_calls),
|
|
647
|
+
flush=True,
|
|
648
|
+
)
|
|
565
649
|
return tool_calls, data
|
|
566
650
|
|
|
567
651
|
|
|
@@ -576,11 +660,13 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
576
660
|
"index": sample["index"],
|
|
577
661
|
}
|
|
578
662
|
|
|
579
|
-
tool_calls: list[
|
|
580
|
-
inference_payload:
|
|
581
|
-
error_info:
|
|
663
|
+
tool_calls: list[dict[str, Any]] = []
|
|
664
|
+
inference_payload: dict[str, Any] | None = None
|
|
665
|
+
error_info: dict[str, Any] = {}
|
|
582
666
|
try:
|
|
583
|
-
tool_calls, inference_payload = await _call_inference(
|
|
667
|
+
tool_calls, inference_payload = await _call_inference(
|
|
668
|
+
request.policy.config or {}, observation
|
|
669
|
+
)
|
|
584
670
|
except HTTPException as http_err:
|
|
585
671
|
tool_calls = []
|
|
586
672
|
error_info = {"error": http_err.detail, "code": http_err.status_code}
|
|
@@ -600,6 +686,28 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
600
686
|
tool_calls,
|
|
601
687
|
)
|
|
602
688
|
|
|
689
|
+
# Log a concise summary so we can debug reward=0 issues in production
|
|
690
|
+
with contextlib.suppress(Exception):
|
|
691
|
+
print(
|
|
692
|
+
"[MATH_ROLLOUT] run=",
|
|
693
|
+
request.run_id,
|
|
694
|
+
" split=",
|
|
695
|
+
sample["split"],
|
|
696
|
+
" index=",
|
|
697
|
+
sample["index"],
|
|
698
|
+
" calls=",
|
|
699
|
+
_preview_tool_calls(tool_calls),
|
|
700
|
+
" reward=",
|
|
701
|
+
reward,
|
|
702
|
+
" status=",
|
|
703
|
+
status,
|
|
704
|
+
" correct=",
|
|
705
|
+
correct,
|
|
706
|
+
" components=",
|
|
707
|
+
_event_and_outcome_components(tool_calls, correct=correct, reward=reward),
|
|
708
|
+
flush=True,
|
|
709
|
+
)
|
|
710
|
+
|
|
603
711
|
step = RolloutStep(
|
|
604
712
|
obs=observation,
|
|
605
713
|
tool_calls=tool_calls,
|
|
@@ -610,6 +718,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
610
718
|
"status": status,
|
|
611
719
|
"correct": correct,
|
|
612
720
|
"raw_solution": sample["raw_solution"],
|
|
721
|
+
"tool_call_preview": _preview_tool_calls(tool_calls),
|
|
613
722
|
**error_info,
|
|
614
723
|
},
|
|
615
724
|
)
|
|
@@ -634,6 +743,34 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
634
743
|
details={"status": status, "correct": correct},
|
|
635
744
|
)
|
|
636
745
|
|
|
746
|
+
# Include a minimal trace when requested or tracing is enabled via env
|
|
747
|
+
include_trace = bool(
|
|
748
|
+
(request.record and getattr(request.record, "return_trace", False))
|
|
749
|
+
or os.getenv("TASKAPP_TRACING_ENABLED")
|
|
750
|
+
)
|
|
751
|
+
trace_payload = None
|
|
752
|
+
if include_trace:
|
|
753
|
+
try:
|
|
754
|
+
# Minimal structured trace for assertions
|
|
755
|
+
trace_payload = {
|
|
756
|
+
"session_id": str(uuid.uuid4()),
|
|
757
|
+
"events_count": 1,
|
|
758
|
+
"decision_rewards": [reward],
|
|
759
|
+
"lm_calls": (
|
|
760
|
+
[{"prompt": str(observation.get("problem", "")), "response": str(tool_calls)}]
|
|
761
|
+
if tool_calls
|
|
762
|
+
else []
|
|
763
|
+
),
|
|
764
|
+
"metadata": {
|
|
765
|
+
"env": "math_single_step",
|
|
766
|
+
"split": sample["split"],
|
|
767
|
+
"index": sample["index"],
|
|
768
|
+
"status": status,
|
|
769
|
+
},
|
|
770
|
+
}
|
|
771
|
+
except Exception:
|
|
772
|
+
trace_payload = None
|
|
773
|
+
|
|
637
774
|
return RolloutResponse(
|
|
638
775
|
run_id=request.run_id,
|
|
639
776
|
trajectories=[trajectory],
|
|
@@ -641,7 +778,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
641
778
|
metrics=metrics,
|
|
642
779
|
aborted=False,
|
|
643
780
|
ops_executed=2,
|
|
644
|
-
trace=
|
|
781
|
+
trace=trace_payload,
|
|
645
782
|
)
|
|
646
783
|
|
|
647
784
|
|
|
@@ -739,7 +876,7 @@ EVENTS_RUBRIC: Rubric = cast(
|
|
|
739
876
|
)
|
|
740
877
|
|
|
741
878
|
|
|
742
|
-
def describe_taskset(dataset: MathDataset) ->
|
|
879
|
+
def describe_taskset(dataset: MathDataset) -> dict[str, Any]:
|
|
743
880
|
return {
|
|
744
881
|
**MATH_DATASET_SPEC.model_dump(),
|
|
745
882
|
"hf_dataset": DATASET_NAME,
|
|
@@ -775,10 +912,12 @@ def build_config() -> TaskAppConfig:
|
|
|
775
912
|
|
|
776
913
|
tracing_enabled = tracing_env_enabled()
|
|
777
914
|
tracing_db_url = resolve_tracing_db_url()
|
|
778
|
-
tracer_factory = build_tracer_factory(
|
|
915
|
+
tracer_factory = build_tracer_factory(
|
|
916
|
+
SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
|
|
917
|
+
)
|
|
779
918
|
sft_output_dir = resolve_sft_output_dir()
|
|
780
919
|
|
|
781
|
-
app_state:
|
|
920
|
+
app_state: dict[str, Any] = {
|
|
782
921
|
"math_dataset": dataset,
|
|
783
922
|
"math_env_manager": MathEnvironmentManager(dataset),
|
|
784
923
|
"tracing_enabled": tracing_enabled,
|
|
@@ -8,10 +8,10 @@ from pathlib import Path
|
|
|
8
8
|
from fastapi.exceptions import RequestValidationError
|
|
9
9
|
from fastapi.responses import JSONResponse
|
|
10
10
|
from starlette.requests import Request
|
|
11
|
-
|
|
11
|
+
from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
|
|
12
12
|
from synth_ai.task.server import create_task_app, run_task_app
|
|
13
|
+
|
|
13
14
|
from .math_single_step import build_config
|
|
14
|
-
from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def fastapi_app():
|
|
@@ -40,7 +40,10 @@ def fastapi_app():
|
|
|
40
40
|
async def health(request: Request):
|
|
41
41
|
env_key = normalize_environment_api_key()
|
|
42
42
|
if not env_key:
|
|
43
|
-
return JSONResponse(
|
|
43
|
+
return JSONResponse(
|
|
44
|
+
status_code=503,
|
|
45
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
46
|
+
)
|
|
44
47
|
if not is_api_key_header_authorized(request):
|
|
45
48
|
prefix = _log_env_key_prefix("health", env_key)
|
|
46
49
|
content = {"status": "healthy", "authorized": False}
|
|
@@ -53,7 +56,10 @@ def fastapi_app():
|
|
|
53
56
|
async def health_rollout(request: Request):
|
|
54
57
|
env_key = normalize_environment_api_key()
|
|
55
58
|
if not env_key:
|
|
56
|
-
return JSONResponse(
|
|
59
|
+
return JSONResponse(
|
|
60
|
+
status_code=503,
|
|
61
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
62
|
+
)
|
|
57
63
|
if not is_api_key_header_authorized(request):
|
|
58
64
|
prefix = _log_env_key_prefix("health/rollout", env_key)
|
|
59
65
|
content = {"status": "healthy", "authorized": False}
|
|
@@ -67,7 +73,7 @@ def fastapi_app():
|
|
|
67
73
|
try:
|
|
68
74
|
hdr = request.headers
|
|
69
75
|
snapshot = {
|
|
70
|
-
"path": str(
|
|
76
|
+
"path": str(request.url.path),
|
|
71
77
|
"have_x_api_key": bool(hdr.get("x-api-key")),
|
|
72
78
|
"have_x_api_keys": bool(hdr.get("x-api-keys")),
|
|
73
79
|
"have_authorization": bool(hdr.get("authorization")),
|
|
@@ -76,7 +82,9 @@ def fastapi_app():
|
|
|
76
82
|
print("[422] validation", snapshot, flush=True)
|
|
77
83
|
except Exception:
|
|
78
84
|
pass
|
|
79
|
-
return JSONResponse(
|
|
85
|
+
return JSONResponse(
|
|
86
|
+
status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
|
|
87
|
+
)
|
|
80
88
|
|
|
81
89
|
return app
|
|
82
90
|
|
examples/sft/README.md
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
### Supervised Fine-Tuning for Crafter
|
|
2
|
+
|
|
3
|
+
This folder provides a minimal, reusable SFT workflow that pulls out the SFT step from `examples/warming_up_to_rl/` and focuses it on LoRA/QLoRA. We've also added guidance for running full finetuning (FFT) so you can compare adapters against end-to-end weight updates.
|
|
4
|
+
|
|
5
|
+
It supports distilling Groq (or other vendor) rollouts into JSONL using tracing and then training a small base model like `Qwen/Qwen3-0.6B`.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
### 0) Load environment from .env.dev (recommended)
|
|
10
|
+
|
|
11
|
+
Use your dev env file so keys/URLs are sourced consistently:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Example path; update to your actual dev env
|
|
15
|
+
set -a && source /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev && set +a
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
This ensures `ENVIRONMENT_API_KEY`, `GROQ_API_KEY`, and (optionally) `BACKEND_BASE_URL` are available to the steps below.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
### 1) Collect traces and export SFT JSONL
|
|
23
|
+
|
|
24
|
+
You can generate traces with the Crafter task app and then export them to SFT JSONL using the existing exporter:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
# Serve the task app locally with tracing enabled (example)
|
|
28
|
+
uvx synth-ai serve grpo-crafter \
|
|
29
|
+
--trace traces/v3 \
|
|
30
|
+
--trace-db traces/v3/synth_ai.db \
|
|
31
|
+
--port 8001
|
|
32
|
+
|
|
33
|
+
# Or run traced local rollouts to accumulate data
|
|
34
|
+
uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
|
|
35
|
+
--episodes 50 --max-turns 10
|
|
36
|
+
|
|
37
|
+
# Export SFT dataset from the trace DB
|
|
38
|
+
uv run python examples/warming_up_to_rl/export_trace_sft.py \
|
|
39
|
+
--db traces/v3/synth_ai.db \
|
|
40
|
+
--min-unique 0 \
|
|
41
|
+
--output examples/sft/ft_data/crafter_traces.jsonl
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Notes:
|
|
45
|
+
- The exporter uses achievements and event rewards to filter high-signal steps. Combine `--min-unique`, `--min-outcome-reward`, `--event-reward`, and `--require-achievement` to control data quality.
|
|
46
|
+
- You can restrict to sessions from certain providers/models with `--provider`/`--model`.
|
|
47
|
+
- Use `--limit` while debugging to reduce dataset size quickly.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
### 2a) Train LoRA (QLoRA) on Qwen/Qwen3-0.6B
|
|
52
|
+
|
|
53
|
+
Use the standard CLI. Do not use a custom Python finetuning script. Point the CLI at your `.env.dev` so it picks up keys automatically:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
uvx synth-ai train \
|
|
57
|
+
--type sft \
|
|
58
|
+
--config examples/sft/configs/crafter_lora_qwen0p6b.toml \
|
|
59
|
+
--dataset examples/sft/ft_data/crafter_traces.jsonl \
|
|
60
|
+
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
The config sets `training.use_qlora = true` and `hyperparameters.train_kind = "peft"` to request LoRA adapters.
|
|
64
|
+
|
|
65
|
+
Experiment tips:
|
|
66
|
+
- The backend currently defaults to a LoRA rank of 16. If you need other ranks, generate the payload with `--dry-run`, add `"lora_rank": <value>` (and optional `"lora_alpha"`, `"lora_dropout"`) under `hyperparameters`, and submit it via the API until the CLI exposes these knobs directly.
|
|
67
|
+
- Duplicate the TOML and adjust `hyperparameters.warmup_ratio`, `learning_rate`, or `gradient_accumulation_steps` to keep the global batch size comparable across datasets.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
### 2b) Train Full Finetune (FFT) on Qwen/Qwen3-0.6B
|
|
72
|
+
|
|
73
|
+
Full finetuning updates all weights and uses a near-identical CLI flow with the LoRA toggle disabled. The helper config lives alongside the LoRA sample:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
uvx synth-ai train \
|
|
77
|
+
--type sft \
|
|
78
|
+
--config examples/sft/configs/crafter_fft_qwen0p6b.toml \
|
|
79
|
+
--dataset examples/sft/ft_data/crafter_traces.jsonl \
|
|
80
|
+
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Key differences vs LoRA:
|
|
84
|
+
- `training.use_qlora = false` and `hyperparameters.train_kind = "fft"` request a full-weight update.
|
|
85
|
+
- `per_device_batch` defaults to 1 to keep memory use comfortable on a single H100; raise gradually as you confirm headroom.
|
|
86
|
+
- FFT runs slower per step. Consider trimming the dataset with `--examples` or the exporter filters for quick baselines.
|
|
87
|
+
|
|
88
|
+
If you want the 4B Crafter FFT baseline from the RL examples, reuse `examples/warming_up_to_rl/configs/crafter_fft_4b.toml` with the same CLI command.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
### 3) Evaluate the fine-tuned models
|
|
93
|
+
|
|
94
|
+
After the job completes, list your fine-tuned models and evaluate them in the Crafter loop:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# List models
|
|
98
|
+
uv run python - <<'PY'
|
|
99
|
+
import asyncio
|
|
100
|
+
import os
|
|
101
|
+
from synth_ai.learning.client import LearningClient
|
|
102
|
+
|
|
103
|
+
backend = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api")
|
|
104
|
+
api_key = os.getenv("SYNTH_API_KEY", "")
|
|
105
|
+
async def main():
|
|
106
|
+
client = LearningClient(backend, api_key)
|
|
107
|
+
models = await client.list_fine_tuned_models()
|
|
108
|
+
for m in models:
|
|
109
|
+
print(m)
|
|
110
|
+
asyncio.run(main())
|
|
111
|
+
PY
|
|
112
|
+
|
|
113
|
+
# Evaluate in the Crafter eval loop (example via warming_up_to_rl)
|
|
114
|
+
TASK_APP_URL=http://localhost:8001 \
|
|
115
|
+
uv run python examples/warming_up_to_rl/run_eval.py \
|
|
116
|
+
--toml examples/warming_up_to_rl/configs/eval_local_vllm.toml \
|
|
117
|
+
--model ft:YOUR_FT_MODEL_ID \
|
|
118
|
+
--use-rollout
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
### 4) Plan comparison runs
|
|
124
|
+
|
|
125
|
+
Keep runs comparable by adjusting one axis at a time and logging the settings in your experiment tracker (spreadsheet, weights & biases, etc.).
|
|
126
|
+
|
|
127
|
+
- **LoRA rank sweeps:** start from `crafter_lora_qwen0p6b.toml`, clone it per rank (e.g., `r=4,8,16,64`). For now add the desired `lora_rank` in the job payload manually (see note above) and include it in the run name.
|
|
128
|
+
- **Dataset size:** duplicate the exported JSONL and slice with `head -n`, or pass `--examples N` to the CLI for quick subsamples. Track the effective token count using the exporter logs.
|
|
129
|
+
- **Data quality:** increase `--min-unique`, require specific achievements, or exclude low-reward sessions with `export_trace_sft.py`. Capture the filter tuple in your run metadata so evaluations stay reproducible.
|
|
130
|
+
- **FFT vs LoRA:** run both configs on the same dataset/cardinality so differences reflect the training method rather than the data.
|
|
131
|
+
|
|
132
|
+
For each sweep, use consistent evaluation seeds and write down throughput (tokens/sec) so you can weigh quality vs cost.
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
### Files
|
|
137
|
+
- `configs/crafter_lora_qwen0p6b.toml`: LoRA/QLoRA SFT config for `Qwen/Qwen3-0.6B`.
|
|
138
|
+
- `configs/crafter_fft_qwen0p6b.toml`: Full-finetune SFT config for `Qwen/Qwen3-0.6B`.
|
|
139
|
+
- `ft_data/`: place your exported JSONL here (ignored by VCS).
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[job]
|
|
2
|
+
model = "Qwen/Qwen3-0.6B"
|
|
3
|
+
# Prefer passing --dataset at runtime for repeatability
|
|
4
|
+
# data = "examples/sft/ft_data/crafter_traces.jsonl"
|
|
5
|
+
|
|
6
|
+
[compute]
|
|
7
|
+
gpu_type = "H100"
|
|
8
|
+
gpu_count = 1
|
|
9
|
+
nodes = 1
|
|
10
|
+
|
|
11
|
+
[data]
|
|
12
|
+
topology = {}
|
|
13
|
+
# Optional validation set if you have one locally
|
|
14
|
+
# validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
|
|
15
|
+
|
|
16
|
+
[training]
|
|
17
|
+
mode = "sft_offline"
|
|
18
|
+
use_qlora = false
|
|
19
|
+
|
|
20
|
+
[training.validation]
|
|
21
|
+
enabled = true
|
|
22
|
+
evaluation_strategy = "steps"
|
|
23
|
+
eval_steps = 50
|
|
24
|
+
save_best_model_at_end = true
|
|
25
|
+
metric_for_best_model = "val.loss"
|
|
26
|
+
greater_is_better = false
|
|
27
|
+
|
|
28
|
+
[hyperparameters]
|
|
29
|
+
n_epochs = 1
|
|
30
|
+
train_kind = "fft"
|
|
31
|
+
per_device_batch = 1
|
|
32
|
+
gradient_accumulation_steps = 32
|
|
33
|
+
sequence_length = 4096
|
|
34
|
+
learning_rate = 1e-5
|
|
35
|
+
warmup_ratio = 0.03
|
|
36
|
+
weight_decay = 0.01
|
|
37
|
+
|
|
38
|
+
[hyperparameters.parallelism]
|
|
39
|
+
use_deepspeed = true
|
|
40
|
+
deepspeed_stage = 2
|
|
41
|
+
fsdp = false
|
|
42
|
+
bf16 = true
|
|
43
|
+
fp16 = false
|
|
44
|
+
activation_checkpointing = true
|