synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +20 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1709 -243
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
examples/rl/run_eval.py
CHANGED
|
@@ -5,27 +5,30 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import argparse
|
|
7
7
|
import asyncio
|
|
8
|
+
import contextlib
|
|
8
9
|
import json
|
|
9
10
|
import os
|
|
10
|
-
|
|
11
|
-
from typing import Any
|
|
11
|
+
import tomllib
|
|
12
|
+
from typing import Any
|
|
12
13
|
|
|
13
14
|
import httpx
|
|
14
|
-
import tomllib
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class TaskAppClient:
|
|
18
18
|
"""Minimal async client for math single-step task app."""
|
|
19
19
|
|
|
20
|
-
def __init__(self, base_url: str, api_key:
|
|
20
|
+
def __init__(self, base_url: str, api_key: str | None = None) -> None:
|
|
21
21
|
self.base_url = base_url.rstrip("/")
|
|
22
22
|
self.api_key = api_key
|
|
23
|
-
self._client:
|
|
23
|
+
self._client: httpx.AsyncClient | None = None
|
|
24
24
|
|
|
25
|
-
async def __aenter__(self) ->
|
|
25
|
+
async def __aenter__(self) -> TaskAppClient:
|
|
26
26
|
headers = {"X-API-Key": self.api_key} if self.api_key else {}
|
|
27
27
|
self._client = httpx.AsyncClient(
|
|
28
|
-
base_url=self.base_url,
|
|
28
|
+
base_url=self.base_url,
|
|
29
|
+
headers=headers,
|
|
30
|
+
timeout=httpx.Timeout(120.0),
|
|
31
|
+
follow_redirects=True,
|
|
29
32
|
)
|
|
30
33
|
return self
|
|
31
34
|
|
|
@@ -39,36 +42,37 @@ class TaskAppClient:
|
|
|
39
42
|
if self._client is None:
|
|
40
43
|
headers = {"X-API-Key": self.api_key} if self.api_key else {}
|
|
41
44
|
self._client = httpx.AsyncClient(
|
|
42
|
-
base_url=self.base_url,
|
|
45
|
+
base_url=self.base_url,
|
|
46
|
+
headers=headers,
|
|
47
|
+
timeout=httpx.Timeout(120.0),
|
|
48
|
+
follow_redirects=True,
|
|
43
49
|
)
|
|
44
50
|
return self._client
|
|
45
51
|
|
|
46
|
-
async def initialize(self, split: str, seed: int | None) ->
|
|
47
|
-
payload:
|
|
52
|
+
async def initialize(self, split: str, seed: int | None) -> dict[str, Any]:
|
|
53
|
+
payload: dict[str, Any] = {"config": {"split": split}}
|
|
48
54
|
if seed is not None:
|
|
49
55
|
payload["seed"] = seed
|
|
50
56
|
resp = await self.client.post("/env/math/initialize", json=payload)
|
|
51
57
|
resp.raise_for_status()
|
|
52
58
|
return resp.json()
|
|
53
59
|
|
|
54
|
-
async def step(self, env_id: str, tool_calls:
|
|
60
|
+
async def step(self, env_id: str, tool_calls: list[dict[str, Any]]) -> dict[str, Any]:
|
|
55
61
|
payload = {"env_id": env_id, "action": {"tool_calls": tool_calls}}
|
|
56
62
|
resp = await self.client.post("/env/math/step", json=payload)
|
|
57
63
|
resp.raise_for_status()
|
|
58
64
|
return resp.json()
|
|
59
65
|
|
|
60
66
|
async def terminate(self, env_id: str) -> None:
|
|
61
|
-
|
|
67
|
+
with contextlib.suppress(Exception):
|
|
62
68
|
await self.client.post("/env/math/terminate", json={"env_id": env_id})
|
|
63
|
-
except Exception:
|
|
64
|
-
pass
|
|
65
69
|
|
|
66
|
-
async def get_info(self) ->
|
|
70
|
+
async def get_info(self) -> dict[str, Any]:
|
|
67
71
|
resp = await self.client.get("/info")
|
|
68
72
|
resp.raise_for_status()
|
|
69
73
|
return resp.json()
|
|
70
74
|
|
|
71
|
-
async def rollout(self, payload:
|
|
75
|
+
async def rollout(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
72
76
|
resp = await self.client.post("/rollout", json=payload)
|
|
73
77
|
resp.raise_for_status()
|
|
74
78
|
return resp.json()
|
|
@@ -76,10 +80,10 @@ class TaskAppClient:
|
|
|
76
80
|
async def post_inference(
|
|
77
81
|
self,
|
|
78
82
|
url: str,
|
|
79
|
-
payload:
|
|
83
|
+
payload: dict[str, Any],
|
|
80
84
|
*,
|
|
81
|
-
headers:
|
|
82
|
-
) ->
|
|
85
|
+
headers: dict[str, str] | None = None,
|
|
86
|
+
) -> dict[str, Any]:
|
|
83
87
|
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as c:
|
|
84
88
|
resp = await c.post(url, json=payload, headers=headers)
|
|
85
89
|
resp.raise_for_status()
|
|
@@ -90,7 +94,7 @@ TOOL_NAME = "math_submit"
|
|
|
90
94
|
DEFAULT_SPLIT = os.getenv("MATH_EVAL_DEFAULT_SPLIT", "validation")
|
|
91
95
|
|
|
92
96
|
|
|
93
|
-
def _math_tool_schema() ->
|
|
97
|
+
def _math_tool_schema() -> list[dict[str, Any]]:
|
|
94
98
|
return [
|
|
95
99
|
{
|
|
96
100
|
"type": "function",
|
|
@@ -103,8 +107,7 @@ def _math_tool_schema() -> List[Dict[str, Any]]:
|
|
|
103
107
|
"answer": {
|
|
104
108
|
"type": "string",
|
|
105
109
|
"description": "Final answer in simplest form",
|
|
106
|
-
}
|
|
107
|
-
,
|
|
110
|
+
},
|
|
108
111
|
"explanation": {
|
|
109
112
|
"type": "string",
|
|
110
113
|
"description": "Optional explanation of reasoning",
|
|
@@ -118,7 +121,7 @@ def _math_tool_schema() -> List[Dict[str, Any]]:
|
|
|
118
121
|
]
|
|
119
122
|
|
|
120
123
|
|
|
121
|
-
def _build_messages(problem: str) ->
|
|
124
|
+
def _build_messages(problem: str) -> list[dict[str, Any]]:
|
|
122
125
|
return [
|
|
123
126
|
{
|
|
124
127
|
"role": "system",
|
|
@@ -134,18 +137,18 @@ def _build_messages(problem: str) -> List[Dict[str, Any]]:
|
|
|
134
137
|
]
|
|
135
138
|
|
|
136
139
|
|
|
137
|
-
def _parse_tool_calls(data:
|
|
140
|
+
def _parse_tool_calls(data: dict[str, Any]) -> list[dict[str, Any]]:
|
|
138
141
|
choices = data.get("choices") or []
|
|
139
142
|
if not choices:
|
|
140
143
|
return []
|
|
141
144
|
message = choices[0].get("message") or {}
|
|
142
145
|
raw_calls = message.get("tool_calls") or []
|
|
143
|
-
tool_calls:
|
|
146
|
+
tool_calls: list[dict[str, Any]] = []
|
|
144
147
|
for call in raw_calls:
|
|
145
148
|
function = call.get("function") or {}
|
|
146
149
|
name = function.get("name")
|
|
147
150
|
arguments = function.get("arguments")
|
|
148
|
-
parsed_args:
|
|
151
|
+
parsed_args: dict[str, Any]
|
|
149
152
|
if isinstance(arguments, str):
|
|
150
153
|
try:
|
|
151
154
|
parsed_args = json.loads(arguments)
|
|
@@ -159,7 +162,7 @@ def _parse_tool_calls(data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
|
159
162
|
return tool_calls
|
|
160
163
|
|
|
161
164
|
|
|
162
|
-
def _detect_provider(model: str, hint:
|
|
165
|
+
def _detect_provider(model: str, hint: str | None) -> str:
|
|
163
166
|
if hint:
|
|
164
167
|
return hint.lower()
|
|
165
168
|
lowered = (model or "").lower()
|
|
@@ -188,10 +191,10 @@ async def _choose_actions(
|
|
|
188
191
|
provider: str,
|
|
189
192
|
model: str,
|
|
190
193
|
problem: str,
|
|
191
|
-
policy_cfg:
|
|
192
|
-
) ->
|
|
194
|
+
policy_cfg: dict[str, Any],
|
|
195
|
+
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
193
196
|
messages = _build_messages(problem)
|
|
194
|
-
payload:
|
|
197
|
+
payload: dict[str, Any] = {
|
|
195
198
|
"model": model,
|
|
196
199
|
"messages": messages,
|
|
197
200
|
"tools": _math_tool_schema(),
|
|
@@ -203,9 +206,7 @@ async def _choose_actions(
|
|
|
203
206
|
|
|
204
207
|
if provider == "groq":
|
|
205
208
|
# Task app proxies Groq requests; reuse existing headers on the client
|
|
206
|
-
response = await client.client.post(
|
|
207
|
-
"/proxy/groq/v1/chat/completions", json=payload
|
|
208
|
-
)
|
|
209
|
+
response = await client.client.post("/proxy/groq/v1/chat/completions", json=payload)
|
|
209
210
|
response.raise_for_status()
|
|
210
211
|
body = response.json()
|
|
211
212
|
else:
|
|
@@ -223,23 +224,20 @@ async def _choose_actions(
|
|
|
223
224
|
headers=headers or None,
|
|
224
225
|
)
|
|
225
226
|
except httpx.ReadTimeout as exc:
|
|
226
|
-
raise RuntimeError(
|
|
227
|
-
"Inference request timed out. Check the inference service." ) from exc
|
|
227
|
+
raise RuntimeError("Inference request timed out. Check the inference service.") from exc
|
|
228
228
|
try:
|
|
229
229
|
body = response.json()
|
|
230
230
|
except Exception:
|
|
231
231
|
body = {"raw": response.text[:800]}
|
|
232
232
|
if response.status_code >= 500:
|
|
233
|
-
raise RuntimeError(
|
|
234
|
-
f"Inference server error {response.status_code}: {body}")
|
|
233
|
+
raise RuntimeError(f"Inference server error {response.status_code}: {body}")
|
|
235
234
|
if response.status_code >= 400:
|
|
236
|
-
raise RuntimeError(
|
|
237
|
-
f"Inference request invalid ({response.status_code}): {body}")
|
|
235
|
+
raise RuntimeError(f"Inference request invalid ({response.status_code}): {body}")
|
|
238
236
|
tool_calls = _parse_tool_calls(body)
|
|
239
237
|
return tool_calls, body
|
|
240
238
|
|
|
241
239
|
|
|
242
|
-
def _tool_to_answer(tool_calls:
|
|
240
|
+
def _tool_to_answer(tool_calls: list[dict[str, Any]]) -> str:
|
|
243
241
|
if not tool_calls:
|
|
244
242
|
return ""
|
|
245
243
|
args = tool_calls[0].get("args") or {}
|
|
@@ -251,11 +249,11 @@ async def eval_episode(
|
|
|
251
249
|
client: TaskAppClient,
|
|
252
250
|
*,
|
|
253
251
|
split: str,
|
|
254
|
-
seed:
|
|
252
|
+
seed: int | None,
|
|
255
253
|
model: str,
|
|
256
254
|
provider: str,
|
|
257
|
-
policy_cfg:
|
|
258
|
-
) ->
|
|
255
|
+
policy_cfg: dict[str, Any],
|
|
256
|
+
) -> dict[str, Any]:
|
|
259
257
|
created = await client.initialize(split, seed)
|
|
260
258
|
env_id = created["env_id"]
|
|
261
259
|
observation = created.get("observation") or {}
|
|
@@ -288,10 +286,10 @@ async def eval_via_rollout(
|
|
|
288
286
|
*,
|
|
289
287
|
run_id: str,
|
|
290
288
|
split: str,
|
|
291
|
-
seed:
|
|
289
|
+
seed: int | None,
|
|
292
290
|
model: str,
|
|
293
|
-
policy_cfg:
|
|
294
|
-
) ->
|
|
291
|
+
policy_cfg: dict[str, Any],
|
|
292
|
+
) -> dict[str, Any]:
|
|
295
293
|
payload = {
|
|
296
294
|
"run_id": run_id,
|
|
297
295
|
"env": {
|
|
@@ -314,6 +312,7 @@ async def eval_via_rollout(
|
|
|
314
312
|
steps = traj.get("steps") or []
|
|
315
313
|
step = steps[0] if steps else {}
|
|
316
314
|
info = step.get("info") or {}
|
|
315
|
+
observation = step.get("obs") or {}
|
|
317
316
|
return {
|
|
318
317
|
"seed": seed,
|
|
319
318
|
"split": split,
|
|
@@ -328,14 +327,14 @@ async def eval_via_rollout(
|
|
|
328
327
|
}
|
|
329
328
|
|
|
330
329
|
|
|
331
|
-
def _load_config(path:
|
|
330
|
+
def _load_config(path: str | None) -> dict[str, Any]:
|
|
332
331
|
if not path:
|
|
333
332
|
return {}
|
|
334
333
|
with open(path, "rb") as fh:
|
|
335
334
|
return tomllib.load(fh)
|
|
336
335
|
|
|
337
336
|
|
|
338
|
-
def _default_policy_cfg(cfg:
|
|
337
|
+
def _default_policy_cfg(cfg: dict[str, Any]) -> dict[str, Any]:
|
|
339
338
|
policy = dict(cfg.get("policy") or {})
|
|
340
339
|
if "inference_url" not in policy:
|
|
341
340
|
env_url = os.getenv("INFERENCE_URL")
|
|
@@ -371,15 +370,17 @@ async def main() -> None:
|
|
|
371
370
|
seed_start = int(cfg.get("seed_start") or 0)
|
|
372
371
|
|
|
373
372
|
policy_cfg = _default_policy_cfg(cfg)
|
|
374
|
-
provider_hint =
|
|
373
|
+
provider_hint = (
|
|
374
|
+
cfg.get("provider") or cfg.get("policy", {}).get("provider") or policy_cfg.get("provider")
|
|
375
|
+
)
|
|
375
376
|
provider = _detect_provider(model, provider_hint)
|
|
376
377
|
policy_cfg.pop("provider", None)
|
|
377
378
|
|
|
378
379
|
api_key = os.getenv("ENVIRONMENT_API_KEY")
|
|
379
380
|
|
|
380
381
|
successes = 0
|
|
381
|
-
failures:
|
|
382
|
-
results:
|
|
382
|
+
failures: dict[str, int] = {}
|
|
383
|
+
results: list[dict[str, Any]] = []
|
|
383
384
|
|
|
384
385
|
async with TaskAppClient(task_app_url, api_key=api_key) as client:
|
|
385
386
|
for episode in range(episodes):
|
|
@@ -412,7 +413,7 @@ async def main() -> None:
|
|
|
412
413
|
problem = data.get("problem")
|
|
413
414
|
tool_calls = data.get("tool_calls") or []
|
|
414
415
|
print(
|
|
415
|
-
f"Episode {episode+1}/{episodes} seed={seed} status={status} reward={data.get('reward')}\n"
|
|
416
|
+
f"Episode {episode + 1}/{episodes} seed={seed} status={status} reward={data.get('reward')}\n"
|
|
416
417
|
f" problem: {problem!r}\n"
|
|
417
418
|
f" tool : {tool_calls!r}\n"
|
|
418
419
|
f" answer : {answer!r}\n expected: {expected!r}",
|
examples/rl/run_rl_and_save.py
CHANGED
|
@@ -7,14 +7,14 @@ import argparse
|
|
|
7
7
|
import json
|
|
8
8
|
import os
|
|
9
9
|
import sys
|
|
10
|
+
import tomllib
|
|
10
11
|
from pathlib import Path
|
|
11
|
-
from typing import Any
|
|
12
|
+
from typing import Any
|
|
12
13
|
|
|
13
14
|
import requests
|
|
14
|
-
import tomllib
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def _load_toml(path: Path) ->
|
|
17
|
+
def _load_toml(path: Path) -> dict[str, Any]:
|
|
18
18
|
if not path.exists():
|
|
19
19
|
print(f"config not found: {path}", file=sys.stderr)
|
|
20
20
|
sys.exit(2)
|
|
@@ -24,10 +24,18 @@ def _load_toml(path: Path) -> Dict[str, Any]:
|
|
|
24
24
|
|
|
25
25
|
def main() -> None:
|
|
26
26
|
parser = argparse.ArgumentParser(description="Create math RL job via backend RL endpoint")
|
|
27
|
-
parser.add_argument(
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--backend", default=os.getenv("BACKEND_BASE_URL", "http://localhost:8000/api")
|
|
29
|
+
)
|
|
28
30
|
parser.add_argument("--config", required=True, help="Path to RL TOML config")
|
|
29
|
-
parser.add_argument(
|
|
30
|
-
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--task-url", default=os.getenv("TASK_APP_URL", ""), help="Override task service URL"
|
|
33
|
+
)
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--idempotency",
|
|
36
|
+
default=os.getenv("RL_IDEMPOTENCY_KEY", ""),
|
|
37
|
+
help="Optional Idempotency-Key header",
|
|
38
|
+
)
|
|
31
39
|
args = parser.parse_args()
|
|
32
40
|
|
|
33
41
|
cfg_path = Path(args.config).expanduser()
|
|
@@ -35,19 +43,29 @@ def main() -> None:
|
|
|
35
43
|
|
|
36
44
|
services = cfg.get("services") if isinstance(cfg.get("services"), dict) else {}
|
|
37
45
|
|
|
38
|
-
task_url = (
|
|
46
|
+
task_url = (
|
|
47
|
+
(args.task_url or "").strip()
|
|
48
|
+
or (os.getenv("TASK_APP_URL") or "").strip()
|
|
49
|
+
or (services.get("task_url") or "").strip()
|
|
50
|
+
)
|
|
39
51
|
if not task_url:
|
|
40
|
-
print(
|
|
52
|
+
print(
|
|
53
|
+
"Missing task service URL. Provide --task-url or set TASK_APP_URL or services.task_url in TOML",
|
|
54
|
+
file=sys.stderr,
|
|
55
|
+
)
|
|
41
56
|
sys.exit(2)
|
|
42
57
|
|
|
43
58
|
model_cfg = cfg.get("model") if isinstance(cfg.get("model"), dict) else {}
|
|
44
59
|
has_source = bool((model_cfg.get("source") or "").strip())
|
|
45
60
|
has_base = bool((model_cfg.get("base") or "").strip())
|
|
46
61
|
if has_source == has_base:
|
|
47
|
-
print(
|
|
62
|
+
print(
|
|
63
|
+
"Model section must specify exactly one of [model].source or [model].base",
|
|
64
|
+
file=sys.stderr,
|
|
65
|
+
)
|
|
48
66
|
sys.exit(2)
|
|
49
67
|
|
|
50
|
-
payload:
|
|
68
|
+
payload: dict[str, Any] = {
|
|
51
69
|
"job_type": "rl",
|
|
52
70
|
"compute": cfg.get("compute", {}),
|
|
53
71
|
"data": {
|
|
@@ -59,7 +77,7 @@ def main() -> None:
|
|
|
59
77
|
|
|
60
78
|
backend = str(args.backend).rstrip("/")
|
|
61
79
|
url = f"{backend}/rl/jobs"
|
|
62
|
-
api_key = (os.getenv("SYNTH_API_KEY") or os.getenv("
|
|
80
|
+
api_key = (os.getenv("SYNTH_API_KEY") or os.getenv("SYNTH_KEY") or "").strip()
|
|
63
81
|
if not api_key:
|
|
64
82
|
print("Missing SYNTH_API_KEY in env", file=sys.stderr)
|
|
65
83
|
sys.exit(2)
|
|
@@ -91,4 +109,3 @@ def main() -> None:
|
|
|
91
109
|
|
|
92
110
|
if __name__ == "__main__":
|
|
93
111
|
main()
|
|
94
|
-
|