synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +5 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +125 -10
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +12 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +58 -1487
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -11
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/validators.py +2 -2
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/utils/env.py +25 -18
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/modal.py +2 -2
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -129,7 +129,7 @@ async def main():
|
|
|
129
129
|
print("✓ Server is healthy")
|
|
130
130
|
except Exception as e:
|
|
131
131
|
print(f"❌ Server not responding: {e}")
|
|
132
|
-
print(f" Start it with: uv run -m synth_ai task-app
|
|
132
|
+
print(f" Start it with: uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913")
|
|
133
133
|
return
|
|
134
134
|
|
|
135
135
|
# Check API key
|
|
@@ -222,4 +222,3 @@ async def main():
|
|
|
222
222
|
|
|
223
223
|
if __name__ == "__main__":
|
|
224
224
|
asyncio.run(main())
|
|
225
|
-
|
|
@@ -12,7 +12,7 @@ from synth_ai.environments.examples.red.taskset import INSTANCE as RED_DEFAULT_I
|
|
|
12
12
|
from synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression import (
|
|
13
13
|
PalletTownProgressionCompositeReward,
|
|
14
14
|
)
|
|
15
|
-
from synth_ai.task.apps import TaskAppEntry, register_task_app
|
|
15
|
+
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
16
16
|
from synth_ai.task.contracts import (
|
|
17
17
|
RolloutMetrics,
|
|
18
18
|
RolloutRequest,
|
|
@@ -260,8 +260,10 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
260
260
|
{
|
|
261
261
|
"role": "system",
|
|
262
262
|
"content": (
|
|
263
|
-
"You are controlling Pokémon Red.
|
|
264
|
-
"
|
|
263
|
+
"You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
|
|
264
|
+
"Your goal is to make progress in the game. Use the execute_sequence tool to press buttons. "
|
|
265
|
+
"Choose appropriate button presses based on what you see in the game screen. "
|
|
266
|
+
"Always respond with exactly one tool call in the format: <tool_call>{\"name\": \"execute_sequence\", \"arguments\": {...}}</tool_call>"
|
|
265
267
|
),
|
|
266
268
|
},
|
|
267
269
|
{
|
|
@@ -788,11 +790,40 @@ def build_config() -> TaskAppConfig:
|
|
|
788
790
|
register_task_app(
|
|
789
791
|
entry=TaskAppEntry(
|
|
790
792
|
app_id="pokemon_red",
|
|
791
|
-
description="Pokémon Red demo task app",
|
|
793
|
+
description="Pokémon Red demo task app with vision support",
|
|
792
794
|
config_factory=build_config,
|
|
793
795
|
aliases=("pokemon_red_demo",),
|
|
794
796
|
env_files=(),
|
|
795
|
-
modal=
|
|
797
|
+
modal=ModalDeploymentConfig(
|
|
798
|
+
app_name="pokemon-red-vision-task-app",
|
|
799
|
+
python_version="3.11",
|
|
800
|
+
pip_packages=(
|
|
801
|
+
"fastapi>=0.100.0",
|
|
802
|
+
"uvicorn>=0.23.0",
|
|
803
|
+
"pydantic>=2.0.0",
|
|
804
|
+
"numpy>=1.24.0",
|
|
805
|
+
"aiohttp>=3.8.0",
|
|
806
|
+
"httpx>=0.24.0",
|
|
807
|
+
"python-dotenv>=1.0.1",
|
|
808
|
+
# Tracing/DB runtime deps
|
|
809
|
+
"sqlalchemy>=2.0.42",
|
|
810
|
+
"aiosqlite>=0.21.0",
|
|
811
|
+
"greenlet>=3.2.3",
|
|
812
|
+
# Pokemon Red environment
|
|
813
|
+
"pyboy>=2.0.0",
|
|
814
|
+
"pillow>=9.0.0",
|
|
815
|
+
),
|
|
816
|
+
extra_local_dirs=(
|
|
817
|
+
# Mount repo root so local modules resolve when deployed on Modal
|
|
818
|
+
("/Users/joshpurtell/Documents/GitHub/synth-ai", "/opt/synth_ai_repo"),
|
|
819
|
+
("/Users/joshpurtell/Documents/GitHub/synth-ai/synth_ai", "/opt/synth_ai_repo/synth_ai"),
|
|
820
|
+
("/Users/joshpurtell/Documents/GitHub/synth-ai/examples/task_apps/pokemon_red", "/opt/synth_ai_repo/examples/task_apps/pokemon_red"),
|
|
821
|
+
),
|
|
822
|
+
secret_names=("openai-api-key", "groq-api-key"),
|
|
823
|
+
memory=16384,
|
|
824
|
+
cpu=4.0,
|
|
825
|
+
max_containers=10,
|
|
826
|
+
),
|
|
796
827
|
)
|
|
797
828
|
)
|
|
798
829
|
|
|
@@ -20,7 +20,7 @@ Sokoban is a classic puzzle game where the player must push boxes onto target lo
|
|
|
20
20
|
cd /path/to/synth-ai
|
|
21
21
|
|
|
22
22
|
# Start the Sokoban task app on port 8911
|
|
23
|
-
uvx synth-ai task-app
|
|
23
|
+
uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
The server will be available at `http://localhost:8911`.
|
|
@@ -283,7 +283,7 @@ lsof -i :8911
|
|
|
283
283
|
kill -9 $(lsof -ti :8911)
|
|
284
284
|
|
|
285
285
|
# Restart
|
|
286
|
-
uvx synth-ai task-app
|
|
286
|
+
uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
|
|
287
287
|
```
|
|
288
288
|
|
|
289
289
|
## Examples
|
|
@@ -304,4 +304,3 @@ To add new features:
|
|
|
304
304
|
## License
|
|
305
305
|
|
|
306
306
|
MIT
|
|
307
|
-
|
|
@@ -1,24 +1,22 @@
|
|
|
1
1
|
# Verilog Eval Config for Groq Qwen3-32B
|
|
2
|
-
# Quick eval to test Verilog task app before RL training
|
|
3
|
-
|
|
4
|
-
[task_app]
|
|
5
|
-
# Update this with your Modal URL after deployment
|
|
6
|
-
url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
|
|
2
|
+
# Quick eval to test the Verilog task app before RL training
|
|
7
3
|
|
|
8
4
|
[eval]
|
|
9
|
-
|
|
5
|
+
app_id = "grpo-verilog"
|
|
6
|
+
task_app_url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
|
|
7
|
+
model = "groq:qwen3-32b"
|
|
10
8
|
seeds = [0, 1, 2]
|
|
11
|
-
|
|
9
|
+
max_turns = 15
|
|
10
|
+
concurrency = 1
|
|
11
|
+
return_trace = true
|
|
12
|
+
trace_format = "structured"
|
|
13
|
+
|
|
14
|
+
[eval.env_config]
|
|
15
|
+
difficulty = "medium"
|
|
12
16
|
|
|
13
|
-
[
|
|
17
|
+
[eval.policy_config]
|
|
14
18
|
provider = "groq"
|
|
15
19
|
model = "qwen/qwen3-32b"
|
|
16
20
|
temperature = 0.2
|
|
17
21
|
max_tokens = 768
|
|
18
22
|
inference_url = "https://api.groq.com/openai/v1/chat/completions"
|
|
19
|
-
|
|
20
|
-
[env]
|
|
21
|
-
difficulty = "medium" # Can be "easy", "medium", or "hard"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Compatibility wrapper for the GRPO Verilog task app.
|
|
2
2
|
|
|
3
3
|
This mirrors the Crafter task app wrapper while delegating configuration to
|
|
4
|
-
`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai
|
|
4
|
+
`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai deploy --runtime uvicorn grpo-verilog`,
|
|
5
5
|
but the module remains for direct execution or importing the FastAPI app.
|
|
6
6
|
"""
|
|
7
7
|
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
# Crafter Full Finetune (FFT) example on H100
|
|
2
2
|
# Adjust paths and hyperparameters to your environment before running.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
[algorithm]
|
|
5
|
+
type = "offline"
|
|
6
|
+
method = "sft"
|
|
7
|
+
variety = "fft"
|
|
5
8
|
|
|
6
9
|
[job]
|
|
7
10
|
model = "Qwen/Qwen3-4B" # base model to finetune
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
|
|
2
2
|
|
|
3
|
-
type = "rl"
|
|
4
|
-
|
|
5
3
|
[algorithm]
|
|
6
4
|
type = "online"
|
|
7
5
|
method = "policy_gradient"
|
|
@@ -40,6 +38,7 @@ health_interval_ms = 300
|
|
|
40
38
|
[model]
|
|
41
39
|
# Base model start
|
|
42
40
|
base = "Qwen/Qwen3-4B"
|
|
41
|
+
trainer_mode = "full"
|
|
43
42
|
label = "crafter-rl-from-base"
|
|
44
43
|
|
|
45
44
|
[rollout]
|
|
@@ -62,6 +61,7 @@ seeds = [
|
|
|
62
61
|
[training]
|
|
63
62
|
num_epochs = 1
|
|
64
63
|
iterations_per_epoch = 10
|
|
64
|
+
max_turns = 10
|
|
65
65
|
batch_size = 16
|
|
66
66
|
group_size = 4
|
|
67
67
|
gradient_accumulation_steps = 1
|
|
@@ -448,7 +448,7 @@ async def main() -> None:
|
|
|
448
448
|
|
|
449
449
|
print(f"Ops executed: {ops}")
|
|
450
450
|
print(
|
|
451
|
-
"Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai
|
|
451
|
+
"Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai deploy --runtime uvicorn …` to persist traces/SFT."
|
|
452
452
|
)
|
|
453
453
|
except httpx.HTTPStatusError as exc:
|
|
454
454
|
detail = (
|
|
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
|
|
|
6
6
|
|
|
7
7
|
## Local development
|
|
8
8
|
```bash
|
|
9
|
-
uvx synth-ai
|
|
9
|
+
uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
|
|
10
10
|
# Optional extras:
|
|
11
11
|
# --env-file path/to/.env # load additional environment variables
|
|
12
12
|
# --reload # enable uvicorn auto-reload
|
|
@@ -8,11 +8,17 @@ import sys
|
|
|
8
8
|
from collections.abc import Iterable, Sequence
|
|
9
9
|
from contextlib import suppress
|
|
10
10
|
from dataclasses import dataclass
|
|
11
|
+
from datetime import UTC, datetime
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
from typing import Any
|
|
13
14
|
|
|
15
|
+
from fastapi import HTTPException
|
|
16
|
+
from pydantic import BaseModel
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
|
|
14
20
|
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
15
|
-
from synth_ai.task.contracts import RolloutMetrics, RolloutRequest, RolloutResponse, TaskInfo
|
|
21
|
+
from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
|
|
16
22
|
from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
|
|
17
23
|
from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
|
|
18
24
|
from synth_ai.task.rubrics import load_rubric
|
|
@@ -115,6 +121,18 @@ try:
|
|
|
115
121
|
except Exception:
|
|
116
122
|
pass
|
|
117
123
|
|
|
124
|
+
try:
|
|
125
|
+
from .synth_envs_hosted.utils import (
|
|
126
|
+
ensure_chat_completions_url,
|
|
127
|
+
extract_trace_correlation_id,
|
|
128
|
+
)
|
|
129
|
+
except Exception: # pragma: no cover - fallback when optional deps missing
|
|
130
|
+
def ensure_chat_completions_url(raw_url, mode=None):
|
|
131
|
+
return raw_url
|
|
132
|
+
|
|
133
|
+
def extract_trace_correlation_id(_raw_url):
|
|
134
|
+
return None
|
|
135
|
+
|
|
118
136
|
HAS_HOSTED = True
|
|
119
137
|
try:
|
|
120
138
|
import crafter # type: ignore
|
|
@@ -306,7 +324,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
|
|
|
306
324
|
def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
|
|
307
325
|
return TaskInfo(
|
|
308
326
|
task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
|
|
309
|
-
|
|
327
|
+
environment="crafter",
|
|
310
328
|
action_space={
|
|
311
329
|
"type": "discrete",
|
|
312
330
|
"size": len(crafter_constants.actions),
|
|
@@ -402,7 +420,7 @@ def provide_task_instances(
|
|
|
402
420
|
infos.append(
|
|
403
421
|
TaskInfo(
|
|
404
422
|
task=base_info.task,
|
|
405
|
-
|
|
423
|
+
environment=base_info.environment,
|
|
406
424
|
action_space=base_info.action_space,
|
|
407
425
|
observation={
|
|
408
426
|
**base_info.observation,
|
|
@@ -536,7 +554,47 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
536
554
|
|
|
537
555
|
request = _coerce_math_to_crafter(request)
|
|
538
556
|
|
|
557
|
+
record_cfg = request.record.model_copy(
|
|
558
|
+
update={
|
|
559
|
+
"return_trace": True,
|
|
560
|
+
"trace_format": "structured",
|
|
561
|
+
}
|
|
562
|
+
)
|
|
563
|
+
request = request.model_copy(update={"record": record_cfg})
|
|
564
|
+
|
|
539
565
|
policy_cfg = dict(request.policy.config or {})
|
|
566
|
+
logger.info(
|
|
567
|
+
"ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
|
|
568
|
+
sorted(policy_cfg.keys()),
|
|
569
|
+
policy_cfg.get("inference_url"),
|
|
570
|
+
request.run_id,
|
|
571
|
+
request.mode,
|
|
572
|
+
)
|
|
573
|
+
inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
|
|
574
|
+
if isinstance(inferred_url, str) and inferred_url:
|
|
575
|
+
policy_cfg["inference_url"] = inferred_url
|
|
576
|
+
else:
|
|
577
|
+
logger.warning(
|
|
578
|
+
"ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
|
|
579
|
+
request.run_id,
|
|
580
|
+
policy_cfg.get("inference_url"),
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"))
|
|
584
|
+
if request.mode == RolloutMode.RL:
|
|
585
|
+
assert trace_correlation_id, (
|
|
586
|
+
f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
|
|
587
|
+
f"policy_cfg_keys={sorted(policy_cfg.keys())} inference_url={policy_cfg.get('inference_url')}"
|
|
588
|
+
)
|
|
589
|
+
if trace_correlation_id:
|
|
590
|
+
policy_cfg["trace_correlation_id"] = trace_correlation_id
|
|
591
|
+
|
|
592
|
+
pipeline_metadata: dict[str, Any] = {}
|
|
593
|
+
if trace_correlation_id:
|
|
594
|
+
pipeline_metadata["trace_correlation_id"] = trace_correlation_id
|
|
595
|
+
if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
|
|
596
|
+
pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
|
|
597
|
+
|
|
540
598
|
try:
|
|
541
599
|
max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
|
|
542
600
|
except Exception:
|
|
@@ -585,17 +643,90 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
585
643
|
safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
|
|
586
644
|
training_session_id=request.training_session_id,
|
|
587
645
|
synth_base_url=request.synth_base_url,
|
|
646
|
+
mode=request.mode,
|
|
588
647
|
)
|
|
589
648
|
|
|
590
649
|
legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
|
|
591
650
|
legacy_request, fastapi_request
|
|
592
651
|
)
|
|
593
652
|
data = legacy_response.model_dump()
|
|
653
|
+
logger.debug(
|
|
654
|
+
"ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
|
|
655
|
+
sorted(data.keys()),
|
|
656
|
+
bool(data.get("trace")),
|
|
657
|
+
)
|
|
594
658
|
metrics = data.get("metrics", {}) or {}
|
|
595
659
|
metrics.setdefault("outcome_score", None)
|
|
596
660
|
metrics.setdefault("events_score", None)
|
|
597
661
|
metrics.setdefault("details", {})
|
|
598
662
|
data["metrics"] = metrics
|
|
663
|
+
|
|
664
|
+
if data.get("trace") is None:
|
|
665
|
+
legacy_trace = getattr(legacy_response, "trace", None)
|
|
666
|
+
if legacy_trace is not None:
|
|
667
|
+
data["trace"] = legacy_trace
|
|
668
|
+
else:
|
|
669
|
+
tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
|
|
670
|
+
if callable(tracer_factory):
|
|
671
|
+
tracer = tracer_factory()
|
|
672
|
+
logger.debug(
|
|
673
|
+
"ROLLOUT_EXEC: trace backfill factory=%s", type(tracer)
|
|
674
|
+
)
|
|
675
|
+
if isinstance(tracer, SessionTracer):
|
|
676
|
+
try:
|
|
677
|
+
await tracer.initialize()
|
|
678
|
+
if tracer.db is not None:
|
|
679
|
+
trace_row = await tracer.db.get_session_trace(request.run_id)
|
|
680
|
+
if trace_row is not None:
|
|
681
|
+
data["trace"] = trace_row
|
|
682
|
+
except Exception as exc:
|
|
683
|
+
logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
|
|
684
|
+
finally:
|
|
685
|
+
with suppress(Exception):
|
|
686
|
+
await tracer.close()
|
|
687
|
+
|
|
688
|
+
final_cid = trace_correlation_id or f"trace_{request.run_id}"
|
|
689
|
+
data["trace_correlation_id"] = final_cid
|
|
690
|
+
|
|
691
|
+
existing_meta = data.get("pipeline_metadata")
|
|
692
|
+
if not isinstance(existing_meta, dict):
|
|
693
|
+
existing_meta = {}
|
|
694
|
+
existing_meta.setdefault("trace_correlation_id", final_cid)
|
|
695
|
+
if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
|
|
696
|
+
existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
|
|
697
|
+
data["pipeline_metadata"] = existing_meta
|
|
698
|
+
|
|
699
|
+
# Propagate inference_url into each legacy trajectory entry for downstream tooling.
|
|
700
|
+
inferred_url = policy_cfg.get("inference_url")
|
|
701
|
+
|
|
702
|
+
if "trajectories" in data:
|
|
703
|
+
normalized_trajs: list[dict[str, Any]] = []
|
|
704
|
+
for traj in data.get("trajectories", []):
|
|
705
|
+
if isinstance(traj, BaseModel):
|
|
706
|
+
traj_dict = traj.model_dump()
|
|
707
|
+
elif isinstance(traj, dict):
|
|
708
|
+
traj_dict = dict(traj)
|
|
709
|
+
else:
|
|
710
|
+
continue
|
|
711
|
+
traj_dict.setdefault("trace_correlation_id", final_cid)
|
|
712
|
+
if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
|
|
713
|
+
traj_dict["inference_url"] = inferred_url
|
|
714
|
+
normalized_trajs.append(traj_dict)
|
|
715
|
+
if normalized_trajs:
|
|
716
|
+
data["trajectories"] = normalized_trajs
|
|
717
|
+
|
|
718
|
+
if data.get("trace") is None:
|
|
719
|
+
data["trace"] = {
|
|
720
|
+
"session_id": request.run_id,
|
|
721
|
+
"created_at": datetime.now(UTC).isoformat(),
|
|
722
|
+
"metadata": dict(existing_meta),
|
|
723
|
+
"event_history": [],
|
|
724
|
+
"markov_blanket_message_history": [],
|
|
725
|
+
}
|
|
726
|
+
raise HTTPException(
|
|
727
|
+
status_code=500, detail="trace_payload_missing: task app did not emit a SessionTrace"
|
|
728
|
+
)
|
|
729
|
+
|
|
599
730
|
return RolloutResponse.model_validate(data)
|
|
600
731
|
|
|
601
732
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This module now delegates to the TaskAppConfig defined in the colocated example at
|
|
4
4
|
`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
5
|
(running the file directly or targeting `fastapi_app` from external tooling). Prefer using
|
|
6
|
-
`uvx synth-ai
|
|
6
|
+
`uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
@@ -148,8 +148,8 @@ class CrafterPolicy(Policy):
|
|
|
148
148
|
if self.use_tools:
|
|
149
149
|
payload["tools"] = TOOLS_SCHEMA
|
|
150
150
|
payload["tool_choice"] = "required"
|
|
151
|
-
|
|
152
|
-
|
|
151
|
+
payload["function_call"] = {"name": "interact_many"}
|
|
152
|
+
payload["parallel_tool_calls"] = False
|
|
153
153
|
payload["stop_after_tool_calls"] = 1
|
|
154
154
|
return payload
|
|
155
155
|
|
|
@@ -158,13 +158,7 @@ class CrafterPolicy(Policy):
|
|
|
158
158
|
response: dict[str, Any],
|
|
159
159
|
use_tools: bool = True,
|
|
160
160
|
) -> list[dict[str, Any]]:
|
|
161
|
-
"""Turn an inference response into environment tool calls.
|
|
162
|
-
|
|
163
|
-
- If tools were used, expect tool_calls-compatible output and forward as-is
|
|
164
|
-
in our simple JSON format: {"tool_name": str, "arguments": {...}}.
|
|
165
|
-
- If no tools, parse plain-text actions using CrafterReActAgent parser and
|
|
166
|
-
wrap them into a single interact_many tool call.
|
|
167
|
-
"""
|
|
161
|
+
"""Turn an inference response into environment tool calls."""
|
|
168
162
|
# First check if we got actual tool calls
|
|
169
163
|
choices = response.get("choices", [])
|
|
170
164
|
tool_calls: list[dict[str, Any]] = []
|
|
@@ -223,24 +217,6 @@ class CrafterPolicy(Policy):
|
|
|
223
217
|
normalized.append(tc)
|
|
224
218
|
return normalized
|
|
225
219
|
|
|
226
|
-
# Otherwise, parse plain text content for actions
|
|
227
|
-
text = ""
|
|
228
|
-
for choice in choices:
|
|
229
|
-
msg = choice.get("message", {})
|
|
230
|
-
content = msg.get("content", "")
|
|
231
|
-
if content:
|
|
232
|
-
text = content
|
|
233
|
-
break
|
|
234
|
-
|
|
235
|
-
if text:
|
|
236
|
-
# Try to parse actions from the text
|
|
237
|
-
from .shared import parse_actions
|
|
238
|
-
|
|
239
|
-
actions = parse_actions(text)
|
|
240
|
-
if actions:
|
|
241
|
-
# Wrap actions in interact_many tool call
|
|
242
|
-
return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
|
|
243
|
-
|
|
244
220
|
# No actions found
|
|
245
221
|
return []
|
|
246
222
|
|
|
@@ -46,7 +46,7 @@ class CrafterReActAgent:
|
|
|
46
46
|
"- Always return a single tool call: interact_many({actions: [...]})\n"
|
|
47
47
|
"- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
|
|
48
48
|
"- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
|
|
49
|
-
"
|
|
49
|
+
"\n"
|
|
50
50
|
"Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
|
|
51
51
|
"place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
|
|
52
52
|
"make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
|
|
@@ -156,13 +156,13 @@ class OpenAIClient:
|
|
|
156
156
|
keys_preview = sorted(processed_request.keys())
|
|
157
157
|
logger.info(f"Request keys: {keys_preview}")
|
|
158
158
|
|
|
159
|
-
# Final hard-guard for OpenAI: ensure unsupported field is not present
|
|
159
|
+
# Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
|
|
160
160
|
try:
|
|
161
|
-
|
|
161
|
+
low_url = url.lower()
|
|
162
|
+
if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
|
|
162
163
|
processed_request.pop("stop_after_tool_calls", None)
|
|
163
|
-
logger.info("Removed stop_after_tool_calls for OpenAI request")
|
|
164
|
+
logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
|
|
164
165
|
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
165
|
-
low_url = url.lower()
|
|
166
166
|
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
167
167
|
processed_request, dict
|
|
168
168
|
):
|
|
@@ -692,9 +692,10 @@ async def step_policy(
|
|
|
692
692
|
"sokoban-react",
|
|
693
693
|
"crafter-react",
|
|
694
694
|
) and getattr(policy, "use_tools", True):
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
695
|
+
inf_req = meta.get("inference_request", {})
|
|
696
|
+
req_tools = inf_req.get("tools")
|
|
697
|
+
req_tool_choice = inf_req.get("tool_choice")
|
|
698
|
+
req_stop_after = inf_req.get("stop_after_tool_calls")
|
|
698
699
|
logger.info(
|
|
699
700
|
f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
|
|
700
701
|
)
|
|
@@ -703,6 +704,8 @@ async def step_policy(
|
|
|
703
704
|
status_code=500,
|
|
704
705
|
detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
|
|
705
706
|
)
|
|
707
|
+
if req_stop_after is None:
|
|
708
|
+
inf_req["stop_after_tool_calls"] = 1
|
|
706
709
|
|
|
707
710
|
# Call inference service with retries for Flash cold-start (503)
|
|
708
711
|
import time as _t
|
|
@@ -1,8 +1,15 @@
|
|
|
1
|
+
[algorithm]
|
|
2
|
+
type = "online"
|
|
3
|
+
method = "policy_gradient"
|
|
4
|
+
variety = "gspo"
|
|
5
|
+
|
|
1
6
|
[services]
|
|
2
7
|
task_url = "https://your-math-task.modal.run"
|
|
3
8
|
|
|
4
9
|
[model]
|
|
5
10
|
base = "Qwen/Qwen3-4B"
|
|
11
|
+
trainer_mode = "full"
|
|
12
|
+
label = "math-single-step-qwen3-4b"
|
|
6
13
|
|
|
7
14
|
[policy]
|
|
8
15
|
model = "Qwen/Qwen3-4B"
|
|
@@ -18,6 +25,8 @@ evaluation_split = "validation"
|
|
|
18
25
|
evaluation_episodes = 256
|
|
19
26
|
|
|
20
27
|
[training]
|
|
28
|
+
num_epochs = 1
|
|
29
|
+
iterations_per_epoch = 20
|
|
21
30
|
max_turns = 1
|
|
22
31
|
ops = ["agent", "env"]
|
|
23
32
|
batch_size = 128
|
|
@@ -31,5 +40,23 @@ learning_rate = 5e-6
|
|
|
31
40
|
gpu_type = "A10G"
|
|
32
41
|
gpu_count = 4
|
|
33
42
|
|
|
43
|
+
[topology]
|
|
44
|
+
type = "single_node_split"
|
|
45
|
+
gpus_for_vllm = 2
|
|
46
|
+
gpus_for_training = 2
|
|
47
|
+
gpus_for_ref = 0
|
|
48
|
+
tensor_parallel = 1
|
|
49
|
+
|
|
50
|
+
[rollout]
|
|
51
|
+
env_name = "math"
|
|
52
|
+
policy_name = "math-single-step"
|
|
53
|
+
max_turns = 1
|
|
54
|
+
episodes_per_batch = 256
|
|
55
|
+
|
|
56
|
+
[evaluation]
|
|
57
|
+
instances = 256
|
|
58
|
+
every_n_iters = 10
|
|
59
|
+
seeds = [0, 1, 2, 3, 4]
|
|
60
|
+
|
|
34
61
|
[tags]
|
|
35
62
|
experiment = "math_single_step"
|
|
@@ -8,6 +8,8 @@ task_url = "http://localhost:8101"
|
|
|
8
8
|
|
|
9
9
|
[model]
|
|
10
10
|
base = "Qwen/Qwen3-1.7B"
|
|
11
|
+
trainer_mode = "full"
|
|
12
|
+
label = "math-single-step-qwen3-1.7b"
|
|
11
13
|
|
|
12
14
|
[policy]
|
|
13
15
|
model = "Qwen/Qwen3-1.7B"
|
|
@@ -23,6 +25,8 @@ evaluation_split = "validation"
|
|
|
23
25
|
evaluation_episodes = 50
|
|
24
26
|
|
|
25
27
|
[training]
|
|
28
|
+
num_epochs = 1
|
|
29
|
+
iterations_per_epoch = 20
|
|
26
30
|
max_turns = 1
|
|
27
31
|
ops = ["agent", "env"]
|
|
28
32
|
batch_size = 2
|
|
@@ -61,6 +65,7 @@ health_max_wait_s = 180
|
|
|
61
65
|
health_interval_ms = 300
|
|
62
66
|
|
|
63
67
|
[rollout]
|
|
68
|
+
env_name = "math"
|
|
64
69
|
policy_name = "math-single-step"
|
|
65
70
|
max_turns = 1
|
|
66
71
|
episodes_per_batch = 32 # group_size * batch_size
|
synth_ai/api/train/builders.py
CHANGED
|
@@ -74,8 +74,14 @@ def build_rl_payload(
|
|
|
74
74
|
idempotency: str | None,
|
|
75
75
|
allow_experimental: bool | None = None,
|
|
76
76
|
) -> RLBuildResult:
|
|
77
|
+
# Load and validate config with SDK-level checks
|
|
78
|
+
from synth_ai.api.train.utils import load_toml
|
|
79
|
+
from synth_ai.cli.commands.train.validation import validate_rl_config
|
|
80
|
+
|
|
77
81
|
try:
|
|
78
|
-
|
|
82
|
+
raw_config = load_toml(config_path)
|
|
83
|
+
validated_config = validate_rl_config(raw_config) # Adds defaults & validates
|
|
84
|
+
rl_cfg = RLConfig.from_mapping(validated_config)
|
|
79
85
|
except ValidationError as exc:
|
|
80
86
|
raise click.ClickException(_format_validation_error(config_path, exc)) from exc
|
|
81
87
|
|
|
@@ -110,8 +116,8 @@ def build_rl_payload(
|
|
|
110
116
|
"Task app URL required (provide --task-url or set services.task_url in TOML)"
|
|
111
117
|
)
|
|
112
118
|
|
|
113
|
-
model_source = (model_cfg.source or "").strip()
|
|
114
|
-
model_base = (model_cfg.base or "").strip()
|
|
119
|
+
model_source = (model_cfg.source or "").strip() if model_cfg else ""
|
|
120
|
+
model_base = (model_cfg.base or "").strip() if model_cfg else ""
|
|
115
121
|
override_model = (overrides.get("model") or "").strip()
|
|
116
122
|
if override_model:
|
|
117
123
|
model_source = override_model
|