synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_small.toml +2 -1
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +154 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +275 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +423 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
- examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +62 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +1 -1
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +37 -0
- examples/rl/configs/rl_from_base_qwen17.toml +76 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +22 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/sft/README.md +5 -5
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
- examples/sft/evaluate.py +4 -4
- examples/sft/export_dataset.py +7 -4
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/README.md +1 -1
- examples/swe/task_app/grpo_swe_mini.py +1 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +2 -8
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +3 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
- examples/task_apps/pokemon_red/task_app.py +199 -6
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +145 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/cli.py +30 -7
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/cli/__init__.py +66 -49
- synth_ai/cli/_modal_wrapper.py +9 -6
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/recent.py +1 -0
- synth_ai/cli/setup.py +266 -0
- synth_ai/cli/task_app_deploy.py +16 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +16 -0
- synth_ai/cli/task_app_serve.py +18 -0
- synth_ai/cli/task_apps.py +392 -141
- synth_ai/cli/train.py +18 -0
- synth_ai/cli/tui.py +62 -0
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/verilog/engine.py +76 -10
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/inference/client.py +1 -1
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +1 -1
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/task/__init__.py +11 -1
- synth_ai/task/apps/__init__.py +5 -2
- synth_ai/task/config.py +259 -0
- synth_ai/task/contracts.py +15 -2
- synth_ai/task/rubrics/__init__.py +4 -2
- synth_ai/task/rubrics/loaders.py +27 -4
- synth_ai/task/rubrics/scoring.py +3 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +145 -2
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/session_tracer.py +10 -0
- synth_ai/tracing_v3/turso/daemon.py +2 -2
- synth_ai/tracing_v3/turso/native_manager.py +108 -77
- synth_ai/tracing_v3/utils.py +1 -1
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +911 -0
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +287 -0
- synth_ai/utils/http.py +169 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
- synth_ai/cli/man.py +0 -106
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Legacy entrypoint for the math single-step task app."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from fastapi.exceptions import RequestValidationError
|
|
9
|
+
from fastapi.responses import JSONResponse
|
|
10
|
+
from starlette.requests import Request
|
|
11
|
+
from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
|
|
12
|
+
from synth_ai.task.server import create_task_app, run_task_app
|
|
13
|
+
|
|
14
|
+
from .math_single_step import build_config
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def fastapi_app():
|
|
18
|
+
"""Return a FastAPI application for hosting the math task app."""
|
|
19
|
+
|
|
20
|
+
app = create_task_app(build_config())
|
|
21
|
+
|
|
22
|
+
# Replace default health endpoints with auth-tolerant handlers.
|
|
23
|
+
filtered_routes = []
|
|
24
|
+
for route in app.router.routes:
|
|
25
|
+
path = getattr(route, "path", None)
|
|
26
|
+
methods = getattr(route, "methods", set()) or set()
|
|
27
|
+
if path in {"/health", "/health/rollout"} and "GET" in methods:
|
|
28
|
+
continue
|
|
29
|
+
filtered_routes.append(route)
|
|
30
|
+
app.router.routes = filtered_routes
|
|
31
|
+
|
|
32
|
+
def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
|
|
33
|
+
if not env_key:
|
|
34
|
+
return None
|
|
35
|
+
prefix = env_key[: max(1, len(env_key) // 2)]
|
|
36
|
+
print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
|
|
37
|
+
return prefix
|
|
38
|
+
|
|
39
|
+
@app.get("/health")
|
|
40
|
+
async def health(request: Request):
|
|
41
|
+
env_key = normalize_environment_api_key()
|
|
42
|
+
if not env_key:
|
|
43
|
+
return JSONResponse(
|
|
44
|
+
status_code=503,
|
|
45
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
46
|
+
)
|
|
47
|
+
if not is_api_key_header_authorized(request):
|
|
48
|
+
prefix = _log_env_key_prefix("health", env_key)
|
|
49
|
+
content = {"status": "healthy", "authorized": False}
|
|
50
|
+
if prefix:
|
|
51
|
+
content["expected_api_key_prefix"] = prefix
|
|
52
|
+
return JSONResponse(status_code=200, content=content)
|
|
53
|
+
return {"status": "healthy", "authorized": True}
|
|
54
|
+
|
|
55
|
+
@app.get("/health/rollout")
|
|
56
|
+
async def health_rollout(request: Request):
|
|
57
|
+
env_key = normalize_environment_api_key()
|
|
58
|
+
if not env_key:
|
|
59
|
+
return JSONResponse(
|
|
60
|
+
status_code=503,
|
|
61
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
62
|
+
)
|
|
63
|
+
if not is_api_key_header_authorized(request):
|
|
64
|
+
prefix = _log_env_key_prefix("health/rollout", env_key)
|
|
65
|
+
content = {"status": "healthy", "authorized": False}
|
|
66
|
+
if prefix:
|
|
67
|
+
content["expected_api_key_prefix"] = prefix
|
|
68
|
+
return JSONResponse(status_code=200, content=content)
|
|
69
|
+
return {"ok": True, "authorized": True}
|
|
70
|
+
|
|
71
|
+
@app.exception_handler(RequestValidationError)
|
|
72
|
+
async def _on_validation_error(request: Request, exc: RequestValidationError):
|
|
73
|
+
try:
|
|
74
|
+
hdr = request.headers
|
|
75
|
+
snapshot = {
|
|
76
|
+
"path": str(request.url.path),
|
|
77
|
+
"have_x_api_key": bool(hdr.get("x-api-key")),
|
|
78
|
+
"have_x_api_keys": bool(hdr.get("x-api-keys")),
|
|
79
|
+
"have_authorization": bool(hdr.get("authorization")),
|
|
80
|
+
"errors": exc.errors()[:5],
|
|
81
|
+
}
|
|
82
|
+
print("[422] validation", snapshot, flush=True)
|
|
83
|
+
except Exception:
|
|
84
|
+
pass
|
|
85
|
+
return JSONResponse(
|
|
86
|
+
status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return app
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
parser = argparse.ArgumentParser(description="Run the math single-step task app locally")
|
|
94
|
+
parser.add_argument("--host", default="0.0.0.0")
|
|
95
|
+
parser.add_argument("--port", type=int, default=8101)
|
|
96
|
+
parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"--env-file",
|
|
99
|
+
action="append",
|
|
100
|
+
default=[],
|
|
101
|
+
help="Path to .env file to load (can be specified multiple times)",
|
|
102
|
+
)
|
|
103
|
+
args = parser.parse_args()
|
|
104
|
+
|
|
105
|
+
run_task_app(
|
|
106
|
+
build_config,
|
|
107
|
+
host=args.host,
|
|
108
|
+
port=args.port,
|
|
109
|
+
reload=args.reload,
|
|
110
|
+
env_files=args.env_file or [],
|
|
111
|
+
)
|
examples/sft/README.md
CHANGED
|
@@ -27,7 +27,7 @@ You can generate traces with the Crafter task app and then export them to SFT JS
|
|
|
27
27
|
# Serve the task app locally with tracing enabled (example)
|
|
28
28
|
uvx synth-ai serve grpo-crafter \
|
|
29
29
|
--trace traces/v3 \
|
|
30
|
-
--trace-db traces/v3/
|
|
30
|
+
--trace-db traces/v3/task_app_traces_<timestamp>.db \
|
|
31
31
|
--port 8001
|
|
32
32
|
|
|
33
33
|
# Or run traced local rollouts to accumulate data
|
|
@@ -36,9 +36,9 @@ uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
|
|
|
36
36
|
|
|
37
37
|
# Export SFT dataset from the trace DB
|
|
38
38
|
uv run python examples/warming_up_to_rl/export_trace_sft.py \
|
|
39
|
-
--db traces/v3/
|
|
39
|
+
--db traces/v3/task_app_traces_<timestamp>.db \
|
|
40
40
|
--min-unique 0 \
|
|
41
|
-
--output examples/sft/ft_data/
|
|
41
|
+
--output examples/sft/ft_data/crafter_sft.jsonl
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
Notes:
|
|
@@ -56,7 +56,7 @@ Use the standard CLI. Do not use a custom Python finetuning script. Point the CL
|
|
|
56
56
|
uvx synth-ai train \
|
|
57
57
|
--type sft \
|
|
58
58
|
--config examples/sft/configs/crafter_lora_qwen0p6b.toml \
|
|
59
|
-
--dataset examples/sft/ft_data/
|
|
59
|
+
--dataset examples/sft/ft_data/crafter_sft.jsonl \
|
|
60
60
|
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
|
|
61
61
|
```
|
|
62
62
|
|
|
@@ -76,7 +76,7 @@ Full finetuning updates all weights and uses a near-identical CLI flow with the
|
|
|
76
76
|
uvx synth-ai train \
|
|
77
77
|
--type sft \
|
|
78
78
|
--config examples/sft/configs/crafter_fft_qwen0p6b.toml \
|
|
79
|
-
--dataset examples/sft/ft_data/
|
|
79
|
+
--dataset examples/sft/ft_data/crafter_sft.jsonl \
|
|
80
80
|
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
|
|
81
81
|
```
|
|
82
82
|
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
type = "sft"
|
|
2
|
+
|
|
1
3
|
[job]
|
|
2
4
|
model = "Qwen/Qwen3-0.6B"
|
|
3
5
|
# Prefer passing --dataset at runtime for repeatability
|
|
4
|
-
# data = "examples/sft/ft_data/
|
|
6
|
+
# data = "examples/sft/ft_data/crafter_sft.jsonl"
|
|
5
7
|
|
|
6
8
|
[compute]
|
|
7
9
|
gpu_type = "H100"
|
|
@@ -11,7 +13,7 @@ nodes = 1
|
|
|
11
13
|
[data]
|
|
12
14
|
topology = {}
|
|
13
15
|
# Optional validation set if you have one locally
|
|
14
|
-
# validation_path = "examples/sft/ft_data/
|
|
16
|
+
# validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
|
|
15
17
|
|
|
16
18
|
[training]
|
|
17
19
|
mode = "sft_offline"
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
type = "sft"
|
|
2
|
+
|
|
1
3
|
[job]
|
|
2
4
|
model = "Qwen/Qwen3-0.6B"
|
|
3
5
|
# Optionally set here, but prefer passing --dataset at runtime
|
|
4
|
-
# data = "examples/sft/ft_data/
|
|
6
|
+
# data = "examples/sft/ft_data/crafter_sft.jsonl"
|
|
5
7
|
|
|
6
8
|
[compute]
|
|
7
9
|
gpu_type = "H100"
|
|
@@ -12,7 +14,7 @@ nodes = 1
|
|
|
12
14
|
# Forwarded into metadata.effective_config
|
|
13
15
|
topology = {}
|
|
14
16
|
# Optional validation set if you have one locally
|
|
15
|
-
# validation_path = "examples/sft/ft_data/
|
|
17
|
+
# validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
|
|
16
18
|
|
|
17
19
|
[training]
|
|
18
20
|
mode = "lora"
|
|
@@ -42,4 +44,3 @@ fsdp = false
|
|
|
42
44
|
bf16 = true
|
|
43
45
|
fp16 = false
|
|
44
46
|
activation_checkpointing = true
|
|
45
|
-
|
examples/sft/evaluate.py
CHANGED
|
@@ -11,6 +11,7 @@ from __future__ import annotations
|
|
|
11
11
|
import argparse
|
|
12
12
|
import asyncio
|
|
13
13
|
import os
|
|
14
|
+
from contextlib import suppress
|
|
14
15
|
from dataclasses import dataclass
|
|
15
16
|
from typing import Any
|
|
16
17
|
|
|
@@ -44,6 +45,7 @@ def _ops(n: int) -> list[str]:
|
|
|
44
45
|
|
|
45
46
|
|
|
46
47
|
def _request(seed: int, a: EvalArgs) -> RolloutRequest:
|
|
48
|
+
from synth_ai.task.contracts import RolloutMode
|
|
47
49
|
return RolloutRequest(
|
|
48
50
|
run_id=f"eval-{seed}",
|
|
49
51
|
env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
|
|
@@ -53,6 +55,7 @@ def _request(seed: int, a: EvalArgs) -> RolloutRequest:
|
|
|
53
55
|
),
|
|
54
56
|
ops=_ops(a.max_llm_calls),
|
|
55
57
|
record=RolloutRecordConfig(trajectories=True, return_trace=False, trace_format="compact"),
|
|
58
|
+
mode=RolloutMode.EVAL,
|
|
56
59
|
)
|
|
57
60
|
|
|
58
61
|
|
|
@@ -102,10 +105,8 @@ async def main() -> None:
|
|
|
102
105
|
for r in results:
|
|
103
106
|
ers = r.get("episode_returns") or []
|
|
104
107
|
if isinstance(ers, list) and ers:
|
|
105
|
-
|
|
108
|
+
with suppress(Exception):
|
|
106
109
|
flat_returns.append(float(ers[0]))
|
|
107
|
-
except Exception:
|
|
108
|
-
pass
|
|
109
110
|
if flat_returns:
|
|
110
111
|
mean_ret = sum(flat_returns) / len(flat_returns)
|
|
111
112
|
print(f"mean_return={mean_ret:.3f} over {len(flat_returns)} episodes")
|
|
@@ -114,4 +115,3 @@ async def main() -> None:
|
|
|
114
115
|
if __name__ == "__main__":
|
|
115
116
|
asyncio.run(main())
|
|
116
117
|
|
|
117
|
-
|
examples/sft/export_dataset.py
CHANGED
|
@@ -20,12 +20,17 @@ from examples.warming_up_to_rl.export_trace_sft import (
|
|
|
20
20
|
parse_event_filters,
|
|
21
21
|
write_jsonl,
|
|
22
22
|
)
|
|
23
|
+
from synth_ai.tracing_v3.constants import TRACE_DB_DIR, canonical_trace_db_name
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
def main() -> None:
|
|
26
27
|
p = argparse.ArgumentParser(description=__doc__)
|
|
27
|
-
p.add_argument(
|
|
28
|
-
|
|
28
|
+
p.add_argument(
|
|
29
|
+
"--db",
|
|
30
|
+
type=Path,
|
|
31
|
+
default=TRACE_DB_DIR / canonical_trace_db_name(),
|
|
32
|
+
)
|
|
33
|
+
p.add_argument("--output", type=Path, default=Path("examples/sft/ft_data/crafter_sft.jsonl"))
|
|
29
34
|
p.add_argument("--model", action="append", dest="models")
|
|
30
35
|
p.add_argument("--provider", action="append", dest="providers")
|
|
31
36
|
p.add_argument("--min-unique", type=int, default=0)
|
|
@@ -113,5 +118,3 @@ def main() -> None:
|
|
|
113
118
|
|
|
114
119
|
if __name__ == "__main__":
|
|
115
120
|
main()
|
|
116
|
-
|
|
117
|
-
|
examples/sft/generate_traces.py
CHANGED
|
@@ -42,6 +42,7 @@ def _build_ops(max_llm_calls: int) -> list[str]:
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
def _build_request(seed: int, run_id: str, model: str, inference_url: str, api_key: str, *, max_llm_calls: int, return_trace: bool) -> RolloutRequest:
|
|
45
|
+
from synth_ai.task.contracts import RolloutMode
|
|
45
46
|
policy_cfg: dict[str, Any] = {
|
|
46
47
|
"model": model,
|
|
47
48
|
"inference_url": inference_url,
|
|
@@ -54,6 +55,7 @@ def _build_request(seed: int, run_id: str, model: str, inference_url: str, api_k
|
|
|
54
55
|
policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_cfg),
|
|
55
56
|
ops=_build_ops(max_llm_calls),
|
|
56
57
|
record=record,
|
|
58
|
+
mode=RolloutMode.EVAL,
|
|
57
59
|
)
|
|
58
60
|
|
|
59
61
|
|
examples/swe/task_app/README.md
CHANGED
|
@@ -38,7 +38,7 @@ uvx synth-ai serve swe-mini \
|
|
|
38
38
|
--port 8020 \
|
|
39
39
|
--env-file .env \
|
|
40
40
|
--trace traces/v3 \
|
|
41
|
-
--trace-db traces/v3/
|
|
41
|
+
--trace-db traces/v3/task_app_traces_<timestamp>.db
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
This avoids interactive prompts (useful for CI) and loads `ENVIRONMENT_API_KEY`, `OPENAI_API_KEY`, etc. from `.env`.
|
|
@@ -484,6 +484,7 @@ def build_config() -> TaskAppConfig:
|
|
|
484
484
|
|
|
485
485
|
legacy_request = LegacyRolloutRequest(
|
|
486
486
|
run_id=request.run_id,
|
|
487
|
+
mode=request.mode, # Preserve mode for nested requests
|
|
487
488
|
env=LegacyRolloutEnvSpec(
|
|
488
489
|
env_id=request.env.env_id,
|
|
489
490
|
env_name=env_spec.env_name or "swe-mini",
|
|
@@ -555,7 +556,6 @@ register_task_app(
|
|
|
555
556
|
description="mini-swe-agent task app with rollout + proxy endpoints",
|
|
556
557
|
config_factory=build_config,
|
|
557
558
|
aliases=("mini-swe", "swe-mini-task"),
|
|
558
|
-
env_files=(str(REPO_ROOT / "backend" / ".env.dev"),),
|
|
559
559
|
modal=ModalDeploymentConfig(
|
|
560
560
|
app_name="swe-mini-task-app",
|
|
561
561
|
python_version="3.11",
|
|
@@ -114,23 +114,11 @@ if __name__ == "__main__":
|
|
|
114
114
|
parser.add_argument("--host", default="0.0.0.0")
|
|
115
115
|
parser.add_argument("--port", type=int, default=8020)
|
|
116
116
|
parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
|
|
117
|
-
parser.add_argument(
|
|
118
|
-
"--env-file",
|
|
119
|
-
action="append",
|
|
120
|
-
default=[],
|
|
121
|
-
help="Additional .env files to load before startup",
|
|
122
|
-
)
|
|
123
117
|
args = parser.parse_args()
|
|
124
118
|
|
|
125
|
-
default_env = Path(__file__).resolve().parents[4] / "backend" / ".env.dev"
|
|
126
|
-
env_files = [str(default_env)] if default_env.exists() else []
|
|
127
|
-
env_files.extend(args.env_file or [])
|
|
128
|
-
|
|
129
119
|
run_task_app(
|
|
130
120
|
build_task_app_config,
|
|
131
121
|
host=args.host,
|
|
132
122
|
port=args.port,
|
|
133
123
|
reload=args.reload,
|
|
134
|
-
env_files=env_files,
|
|
135
124
|
)
|
|
136
|
-
|
|
@@ -776,7 +776,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
776
776
|
or os.getenv("SWE_REX_MODAL_SANDBOX_KWARGS")
|
|
777
777
|
)
|
|
778
778
|
modal_kwargs: dict[str, Any] = {}
|
|
779
|
-
if isinstance(modal_kwargs_raw,
|
|
779
|
+
if isinstance(modal_kwargs_raw, dict | list):
|
|
780
780
|
modal_kwargs = dict(modal_kwargs_raw or {})
|
|
781
781
|
elif isinstance(modal_kwargs_raw, str) and modal_kwargs_raw.strip():
|
|
782
782
|
try:
|
|
@@ -841,9 +841,9 @@ class MiniSweEnvironmentWrapper:
|
|
|
841
841
|
instance_image_tag=instance_image_tag,
|
|
842
842
|
env_image_tag=env_image_tag,
|
|
843
843
|
model_name=model_name,
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
844
|
+
command_cls=Command,
|
|
845
|
+
write_file_request_cls=WriteFileRequest,
|
|
846
|
+
read_file_request_cls=ReadFileRequest,
|
|
847
847
|
)
|
|
848
848
|
try:
|
|
849
849
|
return self._run_coroutine_blocking(coro)
|
|
@@ -867,9 +867,9 @@ class MiniSweEnvironmentWrapper:
|
|
|
867
867
|
instance_image_tag: str,
|
|
868
868
|
env_image_tag: str,
|
|
869
869
|
model_name: str,
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
870
|
+
command_cls,
|
|
871
|
+
write_file_request_cls,
|
|
872
|
+
read_file_request_cls,
|
|
873
873
|
) -> dict[str, Any]:
|
|
874
874
|
deployment = deployment_config.get_deployment()
|
|
875
875
|
await deployment.start()
|
|
@@ -880,7 +880,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
880
880
|
|
|
881
881
|
# Ensure working directory exists.
|
|
882
882
|
mkdir_resp = await runtime.execute(
|
|
883
|
-
|
|
883
|
+
command_cls(command=["mkdir", "-p", remote_root], timeout=60, shell=False)
|
|
884
884
|
)
|
|
885
885
|
if mkdir_resp.exit_code not in (0, None):
|
|
886
886
|
logger.warning("Failed to ensure remote directory %s (exit=%s)", remote_root, mkdir_resp.exit_code)
|
|
@@ -888,8 +888,8 @@ class MiniSweEnvironmentWrapper:
|
|
|
888
888
|
# Upload dataset & predictions.
|
|
889
889
|
dataset_blob = json.dumps([instance], ensure_ascii=False)
|
|
890
890
|
predictions_blob = json.dumps({instance_id: prediction}, ensure_ascii=False)
|
|
891
|
-
await runtime.write_file(
|
|
892
|
-
await runtime.write_file(
|
|
891
|
+
await runtime.write_file(write_file_request_cls(path=dataset_remote_path, content=dataset_blob))
|
|
892
|
+
await runtime.write_file(write_file_request_cls(path=predictions_remote_path, content=predictions_blob))
|
|
893
893
|
|
|
894
894
|
eval_cmd = [
|
|
895
895
|
"python",
|
|
@@ -921,7 +921,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
921
921
|
|
|
922
922
|
command_timeout = max(eval_timeout + 900, 1200)
|
|
923
923
|
response = await runtime.execute(
|
|
924
|
-
|
|
924
|
+
command_cls(
|
|
925
925
|
command=eval_cmd,
|
|
926
926
|
timeout=command_timeout,
|
|
927
927
|
cwd=remote_root,
|
|
@@ -945,7 +945,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
945
945
|
for filename in ("report.json", "test_output.txt", "run_instance.log", "patch.diff"):
|
|
946
946
|
remote_path = f"{remote_log_dir}/{filename}"
|
|
947
947
|
try:
|
|
948
|
-
content = await runtime.read_file(
|
|
948
|
+
content = await runtime.read_file(read_file_request_cls(path=remote_path))
|
|
949
949
|
except Exception:
|
|
950
950
|
continue
|
|
951
951
|
if getattr(content, "content", None):
|
|
@@ -1073,7 +1073,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
1073
1073
|
return value
|
|
1074
1074
|
if isinstance(value, str):
|
|
1075
1075
|
return value.strip().lower() in {"1", "true", "yes", "on"}
|
|
1076
|
-
if isinstance(value,
|
|
1076
|
+
if isinstance(value, int | float):
|
|
1077
1077
|
return bool(value)
|
|
1078
1078
|
return False # pragma: no cover - defensive default
|
|
1079
1079
|
|
|
@@ -343,8 +343,6 @@ async def step_policy(
|
|
|
343
343
|
inf_req = meta["inference_request"]
|
|
344
344
|
msgs = inf_req["messages"]
|
|
345
345
|
model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
|
|
346
|
-
system_messages: list[str] = []
|
|
347
|
-
user_messages: list[str] = []
|
|
348
346
|
if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
|
|
349
347
|
sys_text = msgs[0]["content"]
|
|
350
348
|
policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
|
|
@@ -12,6 +12,7 @@ from fastapi import APIRouter, HTTPException, Request, status
|
|
|
12
12
|
from pydantic import BaseModel
|
|
13
13
|
from synth_ai.lm.vendors.base import BaseLMResponse
|
|
14
14
|
from synth_ai.task.tracing_utils import unique_sft_path
|
|
15
|
+
from synth_ai.task.contracts import RolloutMode
|
|
15
16
|
from synth_ai.tracing_v3.abstractions import EnvironmentEvent, LMCAISEvent, TimeRecord
|
|
16
17
|
from synth_ai.tracing_v3.llm_call_record_helpers import create_llm_call_record_from_response
|
|
17
18
|
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
@@ -120,6 +121,7 @@ class RolloutRequest(BaseModel):
|
|
|
120
121
|
# Optional run/session context
|
|
121
122
|
training_session_id: str | None = None
|
|
122
123
|
synth_base_url: str | None = None
|
|
124
|
+
mode: RolloutMode # Required: explicit RL vs EVAL mode
|
|
123
125
|
|
|
124
126
|
|
|
125
127
|
class RolloutStep(BaseModel):
|
|
@@ -886,14 +888,6 @@ async def execute_rollout(
|
|
|
886
888
|
logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
|
|
887
889
|
tracing_context = RolloutTracingContext(tracer_instance, request, req)
|
|
888
890
|
await tracing_context.start_session()
|
|
889
|
-
# Print whether tracing is active for this rollout
|
|
890
|
-
try:
|
|
891
|
-
print(
|
|
892
|
-
f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
|
|
893
|
-
flush=True,
|
|
894
|
-
)
|
|
895
|
-
except Exception:
|
|
896
|
-
pass
|
|
897
891
|
|
|
898
892
|
# Register run
|
|
899
893
|
registry.register_run(request.run_id)
|