PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show

examples/analyze_semantic_words.sh +2 -2
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +5 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -4
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +36 -5
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
synth_ai/api/train/builders.py +9 -3
synth_ai/api/train/cli.py +125 -10
synth_ai/api/train/configs/__init__.py +8 -1
synth_ai/api/train/configs/rl.py +32 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/auth/credentials.py +119 -0
synth_ai/cli/__init__.py +12 -4
synth_ai/cli/commands/__init__.py +17 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/deploy/__init__.py +23 -0
synth_ai/cli/commands/deploy/core.py +614 -0
synth_ai/cli/commands/deploy/errors.py +72 -0
synth_ai/cli/commands/deploy/validation.py +11 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1109 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +388 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +73 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +199 -0
synth_ai/cli/commands/train/judge_validation.py +304 -0
synth_ai/cli/commands/train/validation.py +443 -0
synth_ai/cli/demo.py +2 -162
synth_ai/cli/deploy/__init__.py +28 -0
synth_ai/cli/deploy/core.py +5 -0
synth_ai/cli/deploy/errors.py +23 -0
synth_ai/cli/deploy/validation.py +5 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +58 -1487
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -11
synth_ai/learning/rl/client.py +3 -1
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +469 -0
synth_ai/streaming/streamer.py +301 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/validators.py +2 -2
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/utils/env.py +25 -18
synth_ai/utils/http.py +4 -1
synth_ai/utils/modal.py +2 -2
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0

examples/task_apps/pokemon_red/eval_pokemon_red_policy.py CHANGED Viewed

@@ -129,7 +129,7 @@ async def main():
             print("✓ Server is healthy")
         except Exception as e:
             print(f"❌ Server not responding: {e}")
-            print(f"   Start it with: uv run -m synth_ai task-app serve pokemon_red --port 8913")
+            print(f"   Start it with: uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913")
             return
         # Check API key
@@ -222,4 +222,3 @@ async def main():
 if __name__ == "__main__":
     asyncio.run(main())

examples/task_apps/pokemon_red/task_app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from synth_ai.environments.examples.red.taskset import INSTANCE as RED_DEFAULT_I
 from synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression import (
     PalletTownProgressionCompositeReward,
 )
-from synth_ai.task.apps import TaskAppEntry, register_task_app
+from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
 from synth_ai.task.contracts import (
     RolloutMetrics,
     RolloutRequest,
@@ -260,8 +260,10 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
             {
                 "role": "system",
                 "content": (
-                    "You are controlling Pokémon Red. Respond with a single tool call named 'press_button' "
-                    "with JSON arguments {button: 'A|B|UP|DOWN|LEFT|RIGHT|START|SELECT', frames: 1-120}."
+                    "You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
+                    "Your goal is to make progress in the game. Use the execute_sequence tool to press buttons. "
+                    "Choose appropriate button presses based on what you see in the game screen. "
+                    "Always respond with exactly one tool call in the format: <tool_call>{\"name\": \"execute_sequence\", \"arguments\": {...}}</tool_call>"
                 ),
             },
             {
@@ -788,11 +790,40 @@ def build_config() -> TaskAppConfig:
 register_task_app(
     entry=TaskAppEntry(
         app_id="pokemon_red",
-        description="Pokémon Red demo task app",
+        description="Pokémon Red demo task app with vision support",
         config_factory=build_config,
         aliases=("pokemon_red_demo",),
         env_files=(),
-        modal=None,
+        modal=ModalDeploymentConfig(
+            app_name="pokemon-red-vision-task-app",
+            python_version="3.11",
+            pip_packages=(
+                "fastapi>=0.100.0",
+                "uvicorn>=0.23.0",
+                "pydantic>=2.0.0",
+                "numpy>=1.24.0",
+                "aiohttp>=3.8.0",
+                "httpx>=0.24.0",
+                "python-dotenv>=1.0.1",
+                # Tracing/DB runtime deps
+                "sqlalchemy>=2.0.42",
+                "aiosqlite>=0.21.0",
+                "greenlet>=3.2.3",
+                # Pokemon Red environment
+                "pyboy>=2.0.0",
+                "pillow>=9.0.0",
+            ),
+            extra_local_dirs=(
+                # Mount repo root so local modules resolve when deployed on Modal
+                ("/Users/joshpurtell/Documents/GitHub/synth-ai", "/opt/synth_ai_repo"),
+                ("/Users/joshpurtell/Documents/GitHub/synth-ai/synth_ai", "/opt/synth_ai_repo/synth_ai"),
+                ("/Users/joshpurtell/Documents/GitHub/synth-ai/examples/task_apps/pokemon_red", "/opt/synth_ai_repo/examples/task_apps/pokemon_red"),
+            ),
+            secret_names=("openai-api-key", "groq-api-key"),
+            memory=16384,
+            cpu=4.0,
+            max_containers=10,
+        ),
     )
 )

examples/task_apps/sokoban/README.md CHANGED Viewed

@@ -20,7 +20,7 @@ Sokoban is a classic puzzle game where the player must push boxes onto target lo
 cd /path/to/synth-ai
 # Start the Sokoban task app on port 8911
-uvx synth-ai task-app serve sokoban --port 8911
+uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
 ```
 The server will be available at `http://localhost:8911`.
@@ -283,7 +283,7 @@ lsof -i :8911
 kill -9 $(lsof -ti :8911)
 # Restart
-uvx synth-ai task-app serve sokoban --port 8911
+uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
 ```
 ## Examples
@@ -304,4 +304,3 @@ To add new features:
 ## License
 MIT

examples/task_apps/verilog/eval_groq_qwen32b.toml CHANGED Viewed

@@ -1,24 +1,22 @@
 # Verilog Eval Config for Groq Qwen3-32B
-# Quick eval to test Verilog task app before RL training
-[task_app]
-# Update this with your Modal URL after deployment
-url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
+# Quick eval to test the Verilog task app before RL training
 [eval]
-num_episodes = 3  # Quick test with 3 seeds
+app_id = "grpo-verilog"
+task_app_url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
+model = "groq:qwen3-32b"
 seeds = [0, 1, 2]
-max_steps = 15    # More steps for Verilog compilation chains
+max_turns = 15
+concurrency = 1
+return_trace = true
+trace_format = "structured"
+[eval.env_config]
+difficulty = "medium"
-[policy]
+[eval.policy_config]
 provider = "groq"
 model = "qwen/qwen3-32b"
 temperature = 0.2
 max_tokens = 768
 inference_url = "https://api.groq.com/openai/v1/chat/completions"
-[env]
-difficulty = "medium"  # Can be "easy", "medium", or "hard"

examples/task_apps/verilog/task_app/grpo_verilog_task_app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Compatibility wrapper for the GRPO Verilog task app.
 This mirrors the Crafter task app wrapper while delegating configuration to
-`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai serve grpo-verilog`,
+`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai deploy --runtime uvicorn grpo-verilog`,
 but the module remains for direct execution or importing the FastAPI app.
 """

examples/vlm/configs/crafter_vlm_gpt4o.toml CHANGED Viewed

@@ -1,4 +1,7 @@
-type = "sft"
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "fft"
 [job]
 model = "openai/gpt-4o-mini-2024-07-18"

examples/warming_up_to_rl/configs/crafter_fft.toml CHANGED Viewed

@@ -1,7 +1,10 @@
 # Crafter Full Finetune (FFT) example on H100
 # Adjust paths and hyperparameters to your environment before running.
-type = "sft"
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "fft"
 [job]
 model = "Qwen/Qwen3-4B"               # base model to finetune

examples/warming_up_to_rl/configs/crafter_fft_4b.toml CHANGED Viewed

@@ -1,7 +1,5 @@
 # FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
-type = "sft"
 [algorithm]
 type = "offline"
 method = "supervised_finetune"

examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml CHANGED Viewed

@@ -1,7 +1,5 @@
 # RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
-type = "rl"
 [algorithm]
 type = "online"
 method = "policy_gradient"
@@ -40,6 +38,7 @@ health_interval_ms = 300
 [model]
 # Base model start
 base = "Qwen/Qwen3-4B"
+trainer_mode = "full"
 label = "crafter-rl-from-base"
 [rollout]
@@ -62,6 +61,7 @@ seeds = [
 [training]
 num_epochs = 1
 iterations_per_epoch = 10
+max_turns = 10
 batch_size = 16
 group_size = 4
 gradient_accumulation_steps = 1

examples/warming_up_to_rl/run_local_rollout_traced.py CHANGED Viewed

@@ -448,7 +448,7 @@ async def main() -> None:
             print(f"Ops executed: {ops}")
             print(
-                "Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai serve …` to persist traces/SFT."
+                "Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai deploy --runtime uvicorn …` to persist traces/SFT."
             )
         except httpx.HTTPStatusError as exc:
             detail = (

examples/warming_up_to_rl/task_app/README.md CHANGED Viewed

@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
 ## Local development
 ```bash
-uvx synth-ai serve grpo-crafter --port 8001
+uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
 # Optional extras:
 #   --env-file path/to/.env    # load additional environment variables
 #   --reload                   # enable uvicorn auto-reload

examples/warming_up_to_rl/task_app/grpo_crafter.py CHANGED Viewed

@@ -8,11 +8,17 @@ import sys
 from collections.abc import Iterable, Sequence
 from contextlib import suppress
 from dataclasses import dataclass
+from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
+from fastapi import HTTPException
+from pydantic import BaseModel
+from pydantic import BaseModel
 from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
-from synth_ai.task.contracts import RolloutMetrics, RolloutRequest, RolloutResponse, TaskInfo
+from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
 from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
 from synth_ai.task.json import to_jsonable  # noqa: F401  (imported for side-effect compatibility)
 from synth_ai.task.rubrics import load_rubric
@@ -115,6 +121,18 @@ try:
 except Exception:
     pass
+try:
+    from .synth_envs_hosted.utils import (
+        ensure_chat_completions_url,
+        extract_trace_correlation_id,
+    )
+except Exception:  # pragma: no cover - fallback when optional deps missing
+    def ensure_chat_completions_url(raw_url, mode=None):
+        return raw_url
+    def extract_trace_correlation_id(_raw_url):
+        return None
 HAS_HOSTED = True
 try:
     import crafter  # type: ignore
@@ -306,7 +324,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
 def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
     return TaskInfo(
         task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
-        environments=["crafter"],
+        environment="crafter",
         action_space={
             "type": "discrete",
             "size": len(crafter_constants.actions),
@@ -402,7 +420,7 @@ def provide_task_instances(
         infos.append(
             TaskInfo(
                 task=base_info.task,
-                environments=base_info.environments,
+                environment=base_info.environment,
                 action_space=base_info.action_space,
                 observation={
                     **base_info.observation,
@@ -536,7 +554,47 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
     request = _coerce_math_to_crafter(request)
+    record_cfg = request.record.model_copy(
+        update={
+            "return_trace": True,
+            "trace_format": "structured",
+        }
+    )
+    request = request.model_copy(update={"record": record_cfg})
     policy_cfg = dict(request.policy.config or {})
+    logger.info(
+        "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
+        sorted(policy_cfg.keys()),
+        policy_cfg.get("inference_url"),
+        request.run_id,
+        request.mode,
+    )
+    inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
+    if isinstance(inferred_url, str) and inferred_url:
+        policy_cfg["inference_url"] = inferred_url
+    else:
+        logger.warning(
+            "ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
+            request.run_id,
+            policy_cfg.get("inference_url"),
+        )
+    trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"))
+    if request.mode == RolloutMode.RL:
+        assert trace_correlation_id, (
+            f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
+            f"policy_cfg_keys={sorted(policy_cfg.keys())} inference_url={policy_cfg.get('inference_url')}"
+        )
+    if trace_correlation_id:
+        policy_cfg["trace_correlation_id"] = trace_correlation_id
+    pipeline_metadata: dict[str, Any] = {}
+    if trace_correlation_id:
+        pipeline_metadata["trace_correlation_id"] = trace_correlation_id
+    if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
+        pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
     try:
         max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
     except Exception:
@@ -585,17 +643,90 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
         safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
         training_session_id=request.training_session_id,
         synth_base_url=request.synth_base_url,
+        mode=request.mode,
     )
     legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
         legacy_request, fastapi_request
     )
     data = legacy_response.model_dump()
+    logger.debug(
+        "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
+        sorted(data.keys()),
+        bool(data.get("trace")),
+    )
     metrics = data.get("metrics", {}) or {}
     metrics.setdefault("outcome_score", None)
     metrics.setdefault("events_score", None)
     metrics.setdefault("details", {})
     data["metrics"] = metrics
+    if data.get("trace") is None:
+        legacy_trace = getattr(legacy_response, "trace", None)
+        if legacy_trace is not None:
+            data["trace"] = legacy_trace
+        else:
+            tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
+            if callable(tracer_factory):
+                tracer = tracer_factory()
+                logger.debug(
+                    "ROLLOUT_EXEC: trace backfill factory=%s", type(tracer)
+                )
+                if isinstance(tracer, SessionTracer):
+                    try:
+                        await tracer.initialize()
+                        if tracer.db is not None:
+                            trace_row = await tracer.db.get_session_trace(request.run_id)
+                            if trace_row is not None:
+                                data["trace"] = trace_row
+                    except Exception as exc:
+                        logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
+                    finally:
+                        with suppress(Exception):
+                            await tracer.close()
+    final_cid = trace_correlation_id or f"trace_{request.run_id}"
+    data["trace_correlation_id"] = final_cid
+    existing_meta = data.get("pipeline_metadata")
+    if not isinstance(existing_meta, dict):
+        existing_meta = {}
+    existing_meta.setdefault("trace_correlation_id", final_cid)
+    if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
+        existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
+    data["pipeline_metadata"] = existing_meta
+    # Propagate inference_url into each legacy trajectory entry for downstream tooling.
+    inferred_url = policy_cfg.get("inference_url")
+    if "trajectories" in data:
+        normalized_trajs: list[dict[str, Any]] = []
+        for traj in data.get("trajectories", []):
+            if isinstance(traj, BaseModel):
+                traj_dict = traj.model_dump()
+            elif isinstance(traj, dict):
+                traj_dict = dict(traj)
+            else:
+                continue
+            traj_dict.setdefault("trace_correlation_id", final_cid)
+            if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
+                traj_dict["inference_url"] = inferred_url
+            normalized_trajs.append(traj_dict)
+        if normalized_trajs:
+            data["trajectories"] = normalized_trajs
+    if data.get("trace") is None:
+        data["trace"] = {
+            "session_id": request.run_id,
+            "created_at": datetime.now(UTC).isoformat(),
+            "metadata": dict(existing_meta),
+            "event_history": [],
+            "markov_blanket_message_history": [],
+        }
+        raise HTTPException(
+            status_code=500, detail="trace_payload_missing: task app did not emit a SessionTrace"
+        )
     return RolloutResponse.model_validate(data)

examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py CHANGED Viewed

@@ -3,7 +3,7 @@
 This module now delegates to the TaskAppConfig defined in the colocated example at
 `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
 (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
-`uvx synth-ai serve grpo-crafter` for local development and testing.
+`uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
 """
 from __future__ import annotations

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py CHANGED Viewed

@@ -148,8 +148,8 @@ class CrafterPolicy(Policy):
         if self.use_tools:
             payload["tools"] = TOOLS_SCHEMA
             payload["tool_choice"] = "required"
-            # Ensure the inference server injects family-specific stop sequences
-            # to terminate immediately after the first tool call for compliance.
+            payload["function_call"] = {"name": "interact_many"}
+            payload["parallel_tool_calls"] = False
             payload["stop_after_tool_calls"] = 1
         return payload
@@ -158,13 +158,7 @@ class CrafterPolicy(Policy):
         response: dict[str, Any],
         use_tools: bool = True,
     ) -> list[dict[str, Any]]:
-        """Turn an inference response into environment tool calls.
-        - If tools were used, expect tool_calls-compatible output and forward as-is
-          in our simple JSON format: {"tool_name": str, "arguments": {...}}.
-        - If no tools, parse plain-text actions using CrafterReActAgent parser and
-          wrap them into a single interact_many tool call.
-        """
+        """Turn an inference response into environment tool calls."""
         # First check if we got actual tool calls
         choices = response.get("choices", [])
         tool_calls: list[dict[str, Any]] = []
@@ -223,24 +217,6 @@ class CrafterPolicy(Policy):
                     normalized.append(tc)
             return normalized
-        # Otherwise, parse plain text content for actions
-        text = ""
-        for choice in choices:
-            msg = choice.get("message", {})
-            content = msg.get("content", "")
-            if content:
-                text = content
-                break
-        if text:
-            # Try to parse actions from the text
-            from .shared import parse_actions
-            actions = parse_actions(text)
-            if actions:
-                # Wrap actions in interact_many tool call
-                return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
         # No actions found
         return []

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py CHANGED Viewed

@@ -46,7 +46,7 @@ class CrafterReActAgent:
             "- Always return a single tool call: interact_many({actions: [...]})\n"
             "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
             "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
-            "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
+            "\n"
             "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
             "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
             "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"

examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py CHANGED Viewed

@@ -156,13 +156,13 @@ class OpenAIClient:
             keys_preview = sorted(processed_request.keys())
             logger.info(f"Request keys: {keys_preview}")
-        # Final hard-guard for OpenAI: ensure unsupported field is not present
+        # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
         try:
-            if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
+            low_url = url.lower()
+            if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
                 processed_request.pop("stop_after_tool_calls", None)
-                logger.info("Removed stop_after_tool_calls for OpenAI request")
+                logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
             # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
-            low_url = url.lower()
             if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
                 processed_request, dict
             ):

examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py CHANGED Viewed

@@ -692,9 +692,10 @@ async def step_policy(
                 "sokoban-react",
                 "crafter-react",
             ) and getattr(policy, "use_tools", True):
-                req_tools = meta["inference_request"]["tools"]
-                req_tool_choice = meta["inference_request"]["tool_choice"]
-                req_stop_after = meta["inference_request"]["stop_after_tool_calls"]
+                inf_req = meta.get("inference_request", {})
+                req_tools = inf_req.get("tools")
+                req_tool_choice = inf_req.get("tool_choice")
+                req_stop_after = inf_req.get("stop_after_tool_calls")
                 logger.info(
                     f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
                 )
@@ -703,6 +704,8 @@ async def step_policy(
                         status_code=500,
                         detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
                     )
+                if req_stop_after is None:
+                    inf_req["stop_after_tool_calls"] = 1
             # Call inference service with retries for Flash cold-start (503)
             import time as _t

examples/workflows/math_rl/configs/rl_from_base_qwen.toml CHANGED Viewed

@@ -1,8 +1,15 @@
+[algorithm]
+type = "online"
+method = "policy_gradient"
+variety = "gspo"
 [services]
 task_url = "https://your-math-task.modal.run"
 [model]
 base = "Qwen/Qwen3-4B"
+trainer_mode = "full"
+label = "math-single-step-qwen3-4b"
 [policy]
 model = "Qwen/Qwen3-4B"
@@ -18,6 +25,8 @@ evaluation_split = "validation"
 evaluation_episodes = 256
 [training]
+num_epochs = 1
+iterations_per_epoch = 20
 max_turns = 1
 ops = ["agent", "env"]
 batch_size = 128
@@ -31,5 +40,23 @@ learning_rate = 5e-6
 gpu_type = "A10G"
 gpu_count = 4
+[topology]
+type = "single_node_split"
+gpus_for_vllm = 2
+gpus_for_training = 2
+gpus_for_ref = 0
+tensor_parallel = 1
+[rollout]
+env_name = "math"
+policy_name = "math-single-step"
+max_turns = 1
+episodes_per_batch = 256
+[evaluation]
+instances = 256
+every_n_iters = 10
+seeds = [0, 1, 2, 3, 4]
 [tags]
 experiment = "math_single_step"

examples/workflows/math_rl/configs/rl_from_base_qwen17.toml CHANGED Viewed

@@ -8,6 +8,8 @@ task_url = "http://localhost:8101"
 [model]
 base = "Qwen/Qwen3-1.7B"
+trainer_mode = "full"
+label = "math-single-step-qwen3-1.7b"
 [policy]
 model = "Qwen/Qwen3-1.7B"
@@ -23,6 +25,8 @@ evaluation_split = "validation"
 evaluation_episodes = 50
 [training]
+num_epochs = 1
+iterations_per_epoch = 20
 max_turns = 1
 ops = ["agent", "env"]
 batch_size = 2
@@ -61,6 +65,7 @@ health_max_wait_s = 180
 health_interval_ms = 300
 [rollout]
+env_name = "math"
 policy_name = "math-single-step"
 max_turns = 1
 episodes_per_batch = 32  # group_size * batch_size

synth_ai/api/train/builders.py CHANGED Viewed

@@ -74,8 +74,14 @@ def build_rl_payload(
     idempotency: str | None,
     allow_experimental: bool | None = None,
 ) -> RLBuildResult:
+    # Load and validate config with SDK-level checks
+    from synth_ai.api.train.utils import load_toml
+    from synth_ai.cli.commands.train.validation import validate_rl_config
     try:
-        rl_cfg = RLConfig.from_path(config_path)
+        raw_config = load_toml(config_path)
+        validated_config = validate_rl_config(raw_config)  # Adds defaults & validates
+        rl_cfg = RLConfig.from_mapping(validated_config)
     except ValidationError as exc:
         raise click.ClickException(_format_validation_error(config_path, exc)) from exc
@@ -110,8 +116,8 @@ def build_rl_payload(
             "Task app URL required (provide --task-url or set services.task_url in TOML)"
         )
-    model_source = (model_cfg.source or "").strip()
-    model_base = (model_cfg.base or "").strip()
+    model_source = (model_cfg.source or "").strip() if model_cfg else ""
+    model_base = (model_cfg.base or "").strip() if model_cfg else ""
     override_model = (overrides.get("model") or "").strip()
     if override_model:
         model_source = override_model

synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.17py3-none-any.whl