synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Lightweight Modal deploy wrapper for Banking77 task app (web)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import modal # type: ignore
|
|
9
|
+
except Exception as exc: # pragma: no cover
|
|
10
|
+
raise SystemExit(f"Modal is required to deploy: {exc}")
|
|
11
|
+
|
|
12
|
+
_here = Path(__file__).resolve()
|
|
13
|
+
_parents = list(_here.parents)
|
|
14
|
+
REPO_ROOT = _parents[3] if len(_parents) > 3 else Path.cwd()
|
|
15
|
+
|
|
16
|
+
app = modal.App("synth-banking77-web")
|
|
17
|
+
|
|
18
|
+
_image = (
|
|
19
|
+
modal.Image.debian_slim(python_version="3.11")
|
|
20
|
+
.pip_install(
|
|
21
|
+
"synth-ai",
|
|
22
|
+
"datasets>=2.14.0",
|
|
23
|
+
"fastapi>=0.115.0",
|
|
24
|
+
"pydantic>=2.0.0",
|
|
25
|
+
"httpx>=0.26.0",
|
|
26
|
+
"python-dotenv>=1.0.0",
|
|
27
|
+
)
|
|
28
|
+
.env({"PYTHONPATH": "/opt/synth_ai_repo"})
|
|
29
|
+
.add_local_dir(str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai", copy=True)
|
|
30
|
+
.add_local_dir(str(REPO_ROOT / "examples"), "/opt/synth_ai_repo/examples", copy=True)
|
|
31
|
+
)
|
|
32
|
+
_env_file = REPO_ROOT / ".env"
|
|
33
|
+
if _env_file.exists():
|
|
34
|
+
_image = _image.add_local_file(str(_env_file), "/opt/synth_ai_repo/.env")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@app.function(image=_image, timeout=600)
|
|
38
|
+
@modal.asgi_app()
|
|
39
|
+
def web():
|
|
40
|
+
# Lazy import the task app to avoid local heavy deps
|
|
41
|
+
import contextlib
|
|
42
|
+
with contextlib.suppress(Exception):
|
|
43
|
+
from dotenv import load_dotenv # type: ignore
|
|
44
|
+
load_dotenv(str(REPO_ROOT / ".env"), override=False)
|
|
45
|
+
from examples.task_apps.banking77.banking77_task_app import fastapi_app # type: ignore
|
|
46
|
+
return fastapi_app()
|
|
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
|
|
|
6
6
|
|
|
7
7
|
## Local development
|
|
8
8
|
```bash
|
|
9
|
-
uvx synth-ai
|
|
9
|
+
uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
|
|
10
10
|
# Optional extras:
|
|
11
11
|
# --env-file path/to/.env # load additional environment variables
|
|
12
12
|
# --reload # enable uvicorn auto-reload
|
|
@@ -6,12 +6,17 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
import os
|
|
8
8
|
import sys
|
|
9
|
+
from urllib.parse import parse_qs, urlparse
|
|
9
10
|
from collections.abc import Iterable, Sequence
|
|
10
11
|
from contextlib import suppress
|
|
11
12
|
from dataclasses import dataclass
|
|
13
|
+
from datetime import UTC, datetime
|
|
12
14
|
from pathlib import Path
|
|
13
15
|
from typing import Any
|
|
14
16
|
|
|
17
|
+
from fastapi import HTTPException
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
|
|
15
20
|
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
16
21
|
from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
|
|
17
22
|
from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
|
|
@@ -37,7 +42,16 @@ except Exception: # pragma: no cover - utils unavailable if optional deps missi
|
|
|
37
42
|
"""Fallback to shared utility for URL normalization."""
|
|
38
43
|
return normalize_inference_url(raw_url) if raw_url else raw_url
|
|
39
44
|
|
|
40
|
-
def extract_trace_correlation_id(_raw_url):
|
|
45
|
+
def extract_trace_correlation_id(_raw_url, mode=None):
|
|
46
|
+
if not isinstance(_raw_url, str):
|
|
47
|
+
return None
|
|
48
|
+
parsed = urlparse(_raw_url)
|
|
49
|
+
query_params = parse_qs(parsed.query or "")
|
|
50
|
+
for key in ("cid", "trace", "trace_correlation_id"):
|
|
51
|
+
values = query_params.get(key) or []
|
|
52
|
+
for value in values:
|
|
53
|
+
if isinstance(value, str) and value.strip():
|
|
54
|
+
return value.strip()
|
|
41
55
|
return None
|
|
42
56
|
logger = logging.getLogger(__name__)
|
|
43
57
|
|
|
@@ -651,12 +665,20 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
|
|
|
651
665
|
if stripped:
|
|
652
666
|
return stripped
|
|
653
667
|
|
|
654
|
-
return extract_trace_correlation_id(policy_cfg.get("inference_url"))
|
|
668
|
+
return extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=mode)
|
|
655
669
|
|
|
656
670
|
|
|
657
671
|
async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
|
|
658
672
|
request = _coerce_math_to_crafter(request)
|
|
659
673
|
|
|
674
|
+
record_cfg = request.record.model_copy(
|
|
675
|
+
update={
|
|
676
|
+
"return_trace": True,
|
|
677
|
+
"trace_format": "structured",
|
|
678
|
+
}
|
|
679
|
+
)
|
|
680
|
+
request = request.model_copy(update={"record": record_cfg})
|
|
681
|
+
|
|
660
682
|
policy_cfg = dict(request.policy.config or {})
|
|
661
683
|
logger.info(
|
|
662
684
|
"ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
|
|
@@ -800,11 +822,49 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
800
822
|
trace_correlation_id,
|
|
801
823
|
)
|
|
802
824
|
data = legacy_response.model_dump()
|
|
825
|
+
legacy_trace = getattr(legacy_response, "trace", None)
|
|
826
|
+
if legacy_trace is not None:
|
|
827
|
+
if isinstance(legacy_trace, dict):
|
|
828
|
+
legacy_trace_preview = list(legacy_trace.keys())[:5]
|
|
829
|
+
else:
|
|
830
|
+
legacy_trace_preview = type(legacy_trace)
|
|
831
|
+
logger.info(
|
|
832
|
+
"ROLLOUT_EXEC: legacy response trace present type=%s preview=%s",
|
|
833
|
+
type(legacy_trace),
|
|
834
|
+
legacy_trace_preview,
|
|
835
|
+
)
|
|
836
|
+
logger.debug(
|
|
837
|
+
"ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
|
|
838
|
+
sorted(data.keys()),
|
|
839
|
+
bool(data.get("trace")),
|
|
840
|
+
)
|
|
803
841
|
metrics = data.get("metrics", {}) or {}
|
|
804
842
|
metrics.setdefault("outcome_score", None)
|
|
805
843
|
metrics.setdefault("events_score", None)
|
|
806
844
|
metrics.setdefault("details", {})
|
|
807
845
|
data["metrics"] = metrics
|
|
846
|
+
|
|
847
|
+
if data.get("trace") is None:
|
|
848
|
+
legacy_trace = getattr(legacy_response, "trace", None)
|
|
849
|
+
if legacy_trace is not None:
|
|
850
|
+
data["trace"] = legacy_trace
|
|
851
|
+
else:
|
|
852
|
+
tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
|
|
853
|
+
if callable(tracer_factory):
|
|
854
|
+
tracer = tracer_factory()
|
|
855
|
+
logger.debug("ROLLOUT_EXEC: trace backfill factory=%s", type(tracer))
|
|
856
|
+
if isinstance(tracer, SessionTracer):
|
|
857
|
+
try:
|
|
858
|
+
await tracer.initialize()
|
|
859
|
+
if tracer.db is not None:
|
|
860
|
+
trace_row = await tracer.db.get_session_trace(request.run_id)
|
|
861
|
+
if trace_row is not None:
|
|
862
|
+
data["trace"] = trace_row
|
|
863
|
+
except Exception as exc:
|
|
864
|
+
logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
|
|
865
|
+
finally:
|
|
866
|
+
with suppress(Exception):
|
|
867
|
+
await tracer.close()
|
|
808
868
|
|
|
809
869
|
# Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
|
|
810
870
|
# Use fallback if somehow missing
|
|
@@ -820,12 +880,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
820
880
|
if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
|
|
821
881
|
existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
|
|
822
882
|
data["pipeline_metadata"] = existing_meta
|
|
823
|
-
|
|
883
|
+
|
|
824
884
|
# Add trace_correlation_id to each trajectory (required for RL training pipeline)
|
|
825
885
|
if "trajectories" in data:
|
|
886
|
+
normalized_trajs: list[dict[str, Any]] = []
|
|
826
887
|
for traj in data.get("trajectories", []):
|
|
827
|
-
if isinstance(traj,
|
|
828
|
-
|
|
888
|
+
if isinstance(traj, BaseModel):
|
|
889
|
+
traj_dict = traj.model_dump()
|
|
890
|
+
elif isinstance(traj, dict):
|
|
891
|
+
traj_dict = dict(traj)
|
|
892
|
+
else:
|
|
893
|
+
continue
|
|
894
|
+
traj_dict["trace_correlation_id"] = final_cid
|
|
895
|
+
if not traj_dict.get("inference_url"):
|
|
896
|
+
inferred_url = policy_cfg.get("inference_url")
|
|
897
|
+
if inferred_url:
|
|
898
|
+
traj_dict["inference_url"] = inferred_url
|
|
899
|
+
normalized_trajs.append(traj_dict)
|
|
900
|
+
if normalized_trajs:
|
|
901
|
+
data["trajectories"] = normalized_trajs
|
|
902
|
+
logger.info(
|
|
903
|
+
"ROLLOUT_EXEC: normalized trajectory sample run_id=%s inference_url=%s",
|
|
904
|
+
request.run_id,
|
|
905
|
+
normalized_trajs[0].get("inference_url") if normalized_trajs else None,
|
|
906
|
+
)
|
|
829
907
|
logger.info(
|
|
830
908
|
"ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
|
|
831
909
|
request.run_id,
|
|
@@ -844,6 +922,12 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
844
922
|
request.run_id,
|
|
845
923
|
existing_meta,
|
|
846
924
|
)
|
|
925
|
+
|
|
926
|
+
if data.get("trace") is None:
|
|
927
|
+
raise HTTPException(
|
|
928
|
+
status_code=500,
|
|
929
|
+
detail="trace_payload_missing: task app did not emit a SessionTrace",
|
|
930
|
+
)
|
|
847
931
|
|
|
848
932
|
# ASSERTION: Verify trace_correlation_id is present in response at all required levels
|
|
849
933
|
assert "trace_correlation_id" in data, (
|
|
@@ -962,6 +1046,7 @@ register_task_app(
|
|
|
962
1046
|
(str(RUBRICS_ROOT), "/opt/synth_ai_repo/examples/multi_step/rubrics"),
|
|
963
1047
|
),
|
|
964
1048
|
secret_names=("groq-api-key", "openai-api-key"),
|
|
1049
|
+
env_vars={"SERVICE": "MODAL"},
|
|
965
1050
|
memory=16384,
|
|
966
1051
|
cpu=4.0,
|
|
967
1052
|
max_containers=10,
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This module now delegates to the TaskAppConfig defined in the colocated example at
|
|
4
4
|
`examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
5
|
(running the file directly or targeting `fastapi_app` from external tooling). Prefer using
|
|
6
|
-
`uvx synth-ai
|
|
6
|
+
`uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
@@ -197,6 +197,8 @@ class CrafterPolicy(Policy):
|
|
|
197
197
|
if self.use_tools:
|
|
198
198
|
payload["tools"] = TOOLS_SCHEMA
|
|
199
199
|
payload["tool_choice"] = "required"
|
|
200
|
+
payload["function_call"] = {"name": "interact_many"}
|
|
201
|
+
payload["parallel_tool_calls"] = False
|
|
200
202
|
# Ensure the inference server injects family-specific stop sequences
|
|
201
203
|
# to terminate immediately after the first tool call for compliance.
|
|
202
204
|
payload["stop_after_tool_calls"] = 1
|
|
@@ -207,13 +209,7 @@ class CrafterPolicy(Policy):
|
|
|
207
209
|
response: dict[str, Any],
|
|
208
210
|
use_tools: bool = True,
|
|
209
211
|
) -> list[dict[str, Any]]:
|
|
210
|
-
"""Turn an inference response into environment tool calls.
|
|
211
|
-
|
|
212
|
-
- If tools were used, expect tool_calls-compatible output and forward as-is
|
|
213
|
-
in our simple JSON format: {"tool_name": str, "arguments": {...}}.
|
|
214
|
-
- If no tools, parse plain-text actions using CrafterReActAgent parser and
|
|
215
|
-
wrap them into a single interact_many tool call.
|
|
216
|
-
"""
|
|
212
|
+
"""Turn an inference response into environment tool calls."""
|
|
217
213
|
# First check if we got actual tool calls
|
|
218
214
|
choices = response.get("choices", [])
|
|
219
215
|
tool_calls: list[dict[str, Any]] = []
|
|
@@ -272,24 +268,6 @@ class CrafterPolicy(Policy):
|
|
|
272
268
|
normalized.append(tc)
|
|
273
269
|
return normalized
|
|
274
270
|
|
|
275
|
-
# Otherwise, parse plain text content for actions
|
|
276
|
-
text = ""
|
|
277
|
-
for choice in choices:
|
|
278
|
-
msg = choice.get("message", {})
|
|
279
|
-
content = msg.get("content", "")
|
|
280
|
-
if content:
|
|
281
|
-
text = content
|
|
282
|
-
break
|
|
283
|
-
|
|
284
|
-
if text:
|
|
285
|
-
# Try to parse actions from the text
|
|
286
|
-
from .shared import parse_actions
|
|
287
|
-
|
|
288
|
-
actions = parse_actions(text)
|
|
289
|
-
if actions:
|
|
290
|
-
# Wrap actions in interact_many tool call
|
|
291
|
-
return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
|
|
292
|
-
|
|
293
271
|
# No actions found
|
|
294
272
|
return []
|
|
295
273
|
|
|
@@ -542,7 +520,7 @@ class CrafterPolicy(Policy):
|
|
|
542
520
|
"claude-3", # All Claude 3 models support vision
|
|
543
521
|
"gemini", # Gemini models
|
|
544
522
|
"qwen-vl", # Qwen Vision-Language models
|
|
545
|
-
"
|
|
523
|
+
"qwen3-vl", # Qwen3 VL
|
|
546
524
|
"pixtral", # Mistral's vision model
|
|
547
525
|
"llava", # LLaVA models
|
|
548
526
|
"phi-3-vision", # Microsoft Phi-3 Vision
|
|
@@ -45,8 +45,7 @@ class CrafterReActAgent:
|
|
|
45
45
|
"Action policy:\n"
|
|
46
46
|
"- Always return a single tool call: interact_many({actions: [...]})\n"
|
|
47
47
|
"- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
|
|
48
|
-
"- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
|
|
49
|
-
"- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
|
|
48
|
+
"- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n\n"
|
|
50
49
|
"Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
|
|
51
50
|
"place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
|
|
52
51
|
"make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import contextlib
|
|
4
|
+
import logging
|
|
4
5
|
import os
|
|
5
6
|
|
|
6
7
|
from fastapi import FastAPI
|
|
@@ -9,6 +10,52 @@ from fastapi.responses import JSONResponse
|
|
|
9
10
|
from pydantic import BaseModel
|
|
10
11
|
from starlette.requests import Request
|
|
11
12
|
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
_VERSION_LOGGED = False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _resolve_task_app_version() -> str:
|
|
19
|
+
env_version = os.getenv("TASK_APP_VERSION")
|
|
20
|
+
if isinstance(env_version, str) and env_version.strip():
|
|
21
|
+
return env_version.strip()
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import importlib.metadata as importlib_metadata # python 3.11 stdlib
|
|
25
|
+
|
|
26
|
+
pkg_version = importlib_metadata.version("synth-ai")
|
|
27
|
+
if isinstance(pkg_version, str) and pkg_version.strip():
|
|
28
|
+
return pkg_version.strip()
|
|
29
|
+
except Exception:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
import synth_ai
|
|
34
|
+
|
|
35
|
+
attr_version = getattr(synth_ai, "__version__", None)
|
|
36
|
+
if isinstance(attr_version, str) and attr_version.strip():
|
|
37
|
+
return attr_version.strip()
|
|
38
|
+
except Exception:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
return "unknown"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _log_task_app_version_once() -> None:
|
|
45
|
+
global _VERSION_LOGGED
|
|
46
|
+
if _VERSION_LOGGED:
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
version = _resolve_task_app_version()
|
|
50
|
+
build_id = os.getenv("TASK_APP_BUILD_ID")
|
|
51
|
+
|
|
52
|
+
if build_id:
|
|
53
|
+
logger.info("TASK_APP_VERSION: %s (build=%s)", version, build_id)
|
|
54
|
+
else:
|
|
55
|
+
logger.info("TASK_APP_VERSION: %s", version)
|
|
56
|
+
|
|
57
|
+
_VERSION_LOGGED = True
|
|
58
|
+
|
|
12
59
|
|
|
13
60
|
class TaskApp:
|
|
14
61
|
"""Holds service configuration and shared state."""
|
|
@@ -56,6 +103,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
|
|
|
56
103
|
allow_headers=["*"],
|
|
57
104
|
)
|
|
58
105
|
|
|
106
|
+
_log_task_app_version_once()
|
|
107
|
+
|
|
59
108
|
# Initialize task app configuration
|
|
60
109
|
task_app = TaskApp()
|
|
61
110
|
app.state.task_app = task_app
|