PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show

examples/analyze_semantic_words.sh +2 -2
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +5 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -4
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +36 -5
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
synth_ai/api/train/builders.py +9 -3
synth_ai/api/train/cli.py +125 -10
synth_ai/api/train/configs/__init__.py +8 -1
synth_ai/api/train/configs/rl.py +32 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/auth/credentials.py +119 -0
synth_ai/cli/__init__.py +12 -4
synth_ai/cli/commands/__init__.py +17 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/deploy/__init__.py +23 -0
synth_ai/cli/commands/deploy/core.py +614 -0
synth_ai/cli/commands/deploy/errors.py +72 -0
synth_ai/cli/commands/deploy/validation.py +11 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1109 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +388 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +73 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +199 -0
synth_ai/cli/commands/train/judge_validation.py +304 -0
synth_ai/cli/commands/train/validation.py +443 -0
synth_ai/cli/demo.py +2 -162
synth_ai/cli/deploy/__init__.py +28 -0
synth_ai/cli/deploy/core.py +5 -0
synth_ai/cli/deploy/errors.py +23 -0
synth_ai/cli/deploy/validation.py +5 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +58 -1487
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -11
synth_ai/learning/rl/client.py +3 -1
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +469 -0
synth_ai/streaming/streamer.py +301 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/validators.py +2 -2
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/utils/env.py +25 -18
synth_ai/utils/http.py +4 -1
synth_ai/utils/modal.py +2 -2
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0

examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml CHANGED Viewed

@@ -1,43 +1,26 @@
 # Evaluation config for gpt-4o-mini with vision
-# Stronger teacher than gpt-5-nano, use for high-quality distillation
+# Higher-quality teacher for Crafter SFT distillation
 [eval]
-model = "gpt-4o-mini-2024-07-18"
-provider = "openai"  # Use OpenAI API
-# Task app endpoint
+app_id = "grpo-crafter-task-app"
 task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
-# Vision settings (auto-detected from "gpt-4o" in model name)
-use_vision = true
-image_only_mode = false  # Include both text + images
-# Rollout settings
-num_episodes = 100
-max_steps_per_episode = 50
-seeds = "200-299"  # Different seeds for comparison
-# Sampling parameters
-temperature = 0.6  # Lower temperature for more consistent behavior
+model = "gpt-4o-mini-2024-07-18"
+seeds = "200-299"
+max_turns = 50
+concurrency = 5
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "structured"
+return_trace = true
+[eval.env_config]
+env_params = {max_steps_per_episode = 50}
+[eval.policy_config]
+provider = "openai"
+model = "gpt-4o-mini-2024-07-18"
+temperature = 0.6
 max_tokens = 512
-# Trace collection
-collect_traces = true
-trace_db = "traces/gpt4o_mini_vision/rollouts.db"
-# Tools
+use_vision = true
+image_only_mode = false
 use_tools = true
-# Parallel rollouts
-parallel_episodes = 5
-[task]
-name = "crafter"
-environment = "crafter-classic"
-# Task-specific settings
-[task.config]
-seed_start = 200
-max_episode_length = 256
-render_size = [64, 64]  # 64x64 PNG images

examples/qwen_vl/configs/eval_gpt5nano_vision.toml CHANGED Viewed

@@ -1,45 +1,26 @@
-# Evaluation config for gpt-4o-mini with vision
-# Collects vision traces for SFT training
-# Note: gpt-5-nano doesn't support tool calling yet, use gpt-4o-mini instead
+# Evaluation config for gpt-4o-mini (vision)
+# Collects traces for SFT training; legacy gpt-5-nano naming kept for convenience
 [eval]
-model = "gpt-4o-mini-2024-07-18"  # Changed from gpt-5-nano (no tool support)
-provider = "openai"  # Use OpenAI API
-# Task app endpoint (local or hosted)
-# task_app_url = "http://localhost:8000"  # Local
-task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"  # Hosted
-# Vision settings (auto-detected from "gpt-5" in model name)
-use_vision = true
-image_only_mode = false  # Include both text + images
-# Rollout settings
-num_episodes = 100
-max_steps_per_episode = 50
-seeds = "0-99"  # Seeds 0 through 99
-# Sampling parameters
+app_id = "grpo-crafter-task-app"
+task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
+model = "gpt-4o-mini-2024-07-18"
+seeds = "0-99"
+max_turns = 50
+concurrency = 5
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "structured"
+return_trace = true
+[eval.env_config]
+env_params = {max_steps_per_episode = 50}
+[eval.policy_config]
+provider = "openai"
+model = "gpt-4o-mini-2024-07-18"
 temperature = 0.7
 max_tokens = 512
-# Trace collection
-collect_traces = true
-trace_db = "traces/gpt5nano_vision/rollouts.db"
-# Tools
+use_vision = true
+image_only_mode = false
 use_tools = true
-# Parallel rollouts (speeds up collection)
-parallel_episodes = 5  # Run 5 episodes in parallel
-[task]
-name = "crafter"
-environment = "crafter-classic"
-# Task-specific settings
-[task.config]
-seed_start = 0
-max_episode_length = 256
-render_size = [64, 64]  # 64x64 PNG images

examples/qwen_vl/configs/eval_qwen3vl_vision.toml ADDED Viewed

@@ -0,0 +1,26 @@
+# Evaluation config for Qwen3-VL vision rollouts
+# Collects traces for SFT training via synth-ai hosted inference
+[eval]
+app_id = "grpo-crafter-task-app"
+task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
+model = "Qwen/Qwen3-VL-8B-Instruct"
+seeds = "100-199"
+max_turns = 50
+concurrency = 5
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "structured"
+return_trace = true
+[eval.env_config]
+env_params = {max_steps_per_episode = 50}
+[eval.policy_config]
+provider = "synth"
+model = "Qwen/Qwen3-VL-8B-Instruct"
+temperature = 0.7
+max_tokens = 512
+use_vision = true
+image_only_mode = false
+use_tools = true

examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} RENAMED Viewed

@@ -1,9 +1,9 @@
-# Filter Qwen2-VL vision traces for SFT training
-# Same settings as gpt5nano filter but for Qwen2-VL traces
+# Filter Qwen3-VL vision traces for SFT training
+# Mirrors the GPT-4o mini filter configuration for vision data
 [filter]
-input_db = "traces/qwen2vl_vision/rollouts.db"
-output_dir = "traces/qwen2vl_vision/sft"
+input_db = "traces/qwen3vl_vision/rollouts.db"
+output_dir = "traces/qwen3vl_vision/sft"
 # Quality filters
 min_steps_per_episode = 5
@@ -47,4 +47,3 @@ val_file = "val.jsonl"
 save_stats = true
 stats_file = "filter_stats.json"
 save_filtered_episode_ids = true

examples/qwen_vl/configs/filter_vision_sft.toml CHANGED Viewed

@@ -2,8 +2,8 @@
 # Applies quality filters and exports to SFT JSONL format
 [filter]
-input_db = "traces/gpt4o_vision_test/rollouts.db"
-output_dir = "traces/gpt4o_vision_test/sft"
+input_db = "traces/gpt4omini_vision/rollouts.db"
+output_dir = "traces/gpt4omini_vision/sft"
 # Quality filters
 min_steps_per_episode = 5        # Remove very short episodes
@@ -50,4 +50,3 @@ val_file = "val.jsonl"
 save_stats = true
 stats_file = "filter_stats.json"
 save_filtered_episode_ids = true

examples/qwen_vl/crafter_qwen_vl_agent.py CHANGED Viewed

@@ -2,7 +2,7 @@
 """
 Crafter agent using Qwen-VL models via synth-ai's hosted inference.
-This demonstrates vision-language models (Qwen2-VL, Qwen3-VL) playing Crafter
+This demonstrates vision-language models (Qwen3-VL family) playing Crafter
 with image observations. The CrafterPolicy automatically detects vision capability
 from the model name and includes base64-encoded PNG frames in the prompt.
@@ -12,7 +12,7 @@ Requirements:
 Usage:
   uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
-      --model Qwen/Qwen2-VL-7B-Instruct --seeds 10 --steps 20
+      --model Qwen/Qwen3-VL-8B-Instruct --seeds 10 --steps 20
 """
 from __future__ import annotations
@@ -142,7 +142,7 @@ async def _run_episode(
     env = CrafterClassicEnvironment(task_instance)
     wrapper = CrafterEnvironmentWrapper(env, seed=seed)
-    # Policy will auto-detect vision from model name (qwen-vl, qwen2-vl, qwen3-vl)
+    # Policy will auto-detect vision from model name (qwen-vl and qwen3-vl tokens)
     policy = CrafterPolicy(inference_url="synth://inference", model=model)
     await policy.initialize({
         "use_tools": True,
@@ -235,8 +235,8 @@ async def main() -> None:
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
         "--model",
-        default="Qwen/Qwen2-VL-7B-Instruct",
-        help="Qwen-VL model name (e.g., Qwen/Qwen2-VL-7B-Instruct, Qwen/Qwen3-VL-8B)",
+        default="Qwen/Qwen3-VL-8B-Instruct",
+        help="Qwen-VL model name (e.g., Qwen/Qwen3-VL-2B-Instruct, Qwen/Qwen3-VL-8B-Instruct)",
     )
     parser.add_argument("--seeds", type=int, default=10, help="Number of random seeds to evaluate")
     parser.add_argument("--steps", type=int, default=20, help="Max steps per seed")

examples/qwen_vl/run_vision_comparison.sh CHANGED Viewed

@@ -37,13 +37,13 @@ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py \
 echo ""
 echo "======================================"
-echo "2. Running Qwen2-VL-7B (synth-ai)"
+echo "2. Running Qwen3-VL-8B (synth-ai)"
 echo "======================================"
 uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
-    --model Qwen/Qwen2-VL-7B-Instruct \
+    --model Qwen/Qwen3-VL-8B-Instruct \
     --seeds $SEEDS \
     --steps $STEPS \
-    --output-dir "$OUTPUT_DIR/qwen2vl"
+    --output-dir "$OUTPUT_DIR/qwen3vl"
 echo ""
 echo "======================================"
@@ -53,10 +53,9 @@ echo ""
 echo "gpt-5-nano (OpenAI):"
 cat "$OUTPUT_DIR/gpt5nano/gpt5nano_summary.json" | python -m json.tool
 echo ""
-echo "Qwen2-VL-7B (synth-ai):"
-cat "$OUTPUT_DIR/qwen2vl/qwen_vl_summary.json" | python -m json.tool
+echo "Qwen3-VL-8B (synth-ai):"
+cat "$OUTPUT_DIR/qwen3vl/qwen_vl_summary.json" | python -m json.tool
 echo ""
 echo "Frames saved in:"
 echo "  - $OUTPUT_DIR/gpt5nano/gpt5nano_frames/"
-echo "  - $OUTPUT_DIR/qwen2vl/qwen_vl_frames/"
+echo "  - $OUTPUT_DIR/qwen3vl/qwen_vl_frames/"

examples/rl/README.md CHANGED Viewed

@@ -5,8 +5,8 @@ This example trains a reinforcement learning policy on single-step math problems
 ## Quick Commands
 ```bash
-# Serve locally with tracing
-uvx synth-ai serve math-single-step --port 8101 --env-file examples/rl/.env --trace traces/math
+# Serve locally with tracing (uvicorn runtime)
+uvx synth-ai deploy --runtime uvicorn math-single-step --port 8101 --env-file examples/rl/.env --trace traces/math
 # Modal deployment
 uvx synth-ai deploy --name synth-math-single-step --env-file examples/rl/.env
@@ -45,10 +45,10 @@ The task app is defined in `synth_ai/task/apps/math_single_step.py` and register
   - `-0.5` if the tool call omits an answer or uses the wrong tool
   - `-1.0` when no tool call is provided
-Serve locally with tracing to capture trajectories:
+Run locally (uvicorn runtime) with tracing to capture trajectories:
 ```bash
-uvx synth-ai serve math-single-step \
+uvx synth-ai deploy --runtime uvicorn math-single-step \
   --port 8101 \
   --env-file examples/rl/.env \
   --trace traces/math \
@@ -162,7 +162,7 @@ For broader background on Synth task apps, CLI commands, and tracing, see the ne
 uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_base_qwen.toml
-uvx synth-ai serve math-single-step \
+uvx synth-ai deploy --runtime uvicorn math-single-step \
     --port 8101 \
     --env-file examples/rl/.env \
     --trace traces/math \

examples/rl/configs/rl_from_base_qwen.toml CHANGED Viewed

@@ -1,10 +1,15 @@
-type = "rl"
+[algorithm]
+type = "online"
+method = "policy_gradient"
+variety = "gspo"
 [services]
 task_url = "https://your-math-task.modal.run"
 [model]
 base = "Qwen/Qwen3-4B"
+trainer_mode = "full"
+label = "math-single-step-qwen3-4b"
 [policy]
 model = "Qwen/Qwen3-4B"
@@ -20,6 +25,8 @@ evaluation_split = "validation"
 evaluation_episodes = 256
 [training]
+num_epochs = 1
+iterations_per_epoch = 20
 max_turns = 1
 ops = ["agent", "env"]
 batch_size = 128
@@ -33,5 +40,23 @@ learning_rate = 5e-6
 gpu_type = "A10G"
 gpu_count = 4
+[topology]
+type = "single_node_split"
+gpus_for_vllm = 2
+gpus_for_training = 2
+gpus_for_ref = 0
+tensor_parallel = 1
+[rollout]
+env_name = "math"
+policy_name = "math-single-step"
+max_turns = 1
+episodes_per_batch = 256
+[evaluation]
+instances = 256
+every_n_iters = 10
+seeds = [0, 1, 2, 3, 4]
 [tags]
 experiment = "math_single_step"

examples/rl/configs/rl_from_base_qwen17.toml CHANGED Viewed

@@ -1,5 +1,3 @@
-type = "rl"
 [algorithm]
 type = "online"
 method = "policy_gradient"
@@ -10,6 +8,8 @@ task_url = "http://localhost:8101"
 [model]
 base = "Qwen/Qwen3-1.7B"
+trainer_mode = "full"
+label = "math-single-step-qwen3-1.7b"
 [policy]
 model = "Qwen/Qwen3-1.7B"
@@ -25,6 +25,8 @@ evaluation_split = "validation"
 evaluation_episodes = 50
 [training]
+num_epochs = 1
+iterations_per_epoch = 20
 max_turns = 1
 ops = ["agent", "env"]
 batch_size = 2
@@ -63,6 +65,7 @@ health_max_wait_s = 180
 health_interval_ms = 300
 [rollout]
+env_name = "math"
 policy_name = "math-single-step"
 max_turns = 1
 episodes_per_batch = 32  # group_size * batch_size

examples/rl/task_app/README.md CHANGED Viewed

@@ -3,7 +3,7 @@
 This directory hosts the legacy entrypoint for the math single-step task app. Prefer starting the app via:
 ```bash
-uvx synth-ai serve math-single-step --env-file examples/rl/.env --port 8101
+uvx synth-ai deploy --runtime uvicorn math-single-step --env-file examples/rl/.env --port 8101
 ```
 If you need to run it directly (e.g., for Modal `modal deploy` compatibility), use:
@@ -19,4 +19,3 @@ Environment variables:
 - `MATH_DATASET_DEFAULT_SPLIT`, `MATH_DATASET_VALIDATION_SPLIT`, `MATH_DATASET_TEST_SPLIT`
 The task app enforces a single `math_submit` tool call per episode, enabling RL to reward correct final answers and penalise missing or malformed submissions.

examples/rl/task_app/math_single_step.py CHANGED Viewed

@@ -800,7 +800,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, MathDataset]:
 def _base_task_info() -> TaskInfo:
     return TaskInfo(
         task={"id": "math_single_step", "name": "Math Single Step", "version": "1.0.0"},
-        environments=["math"],
+        environment="math",
         action_space={
             "type": "tool_call",
             "tools": [
@@ -891,7 +891,7 @@ def provide_task_instances(dataset: MathDataset, seeds: Sequence[int]) -> Iterab
         sample = dataset.sample(split=DEFAULT_SPLIT, index=seed)
         yield TaskInfo(
             task=info.task,
-            environments=info.environments,
+            environment=info.environment,
             action_space=info.action_space,
             observation={**info.observation, "sample_index": sample["index"]},
             dataset={

examples/run_crafter_demo.sh CHANGED Viewed

@@ -4,7 +4,7 @@
 # This script demonstrates a reactive agent in the Crafter environment
 echo "🚀 Starting Crafter agent demo with Gemini 1.5 Flash..."
-echo "Make sure the synth-ai service is running: uvx synth-ai serve"
+echo "Make sure the synth-ai service is running: uvx synth-ai deploy --runtime uvicorn"
 echo ""
-uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash
+uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash

examples/sft/README.md CHANGED Viewed

@@ -25,7 +25,7 @@ You can generate traces with the Crafter task app and then export them to SFT JS
 ```bash
 # Serve the task app locally with tracing enabled (example)
-uvx synth-ai serve grpo-crafter \
+uvx synth-ai deploy --runtime uvicorn grpo-crafter \
   --trace traces/v3 \
   --trace-db traces/v3/task_app_traces_<timestamp>.db \
   --port 8001

examples/sft/configs/crafter_fft_qwen0p6b.toml CHANGED Viewed

@@ -1,4 +1,7 @@
-type = "sft"
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "fft"
 [job]
 model = "Qwen/Qwen3-0.6B"

examples/sft/configs/crafter_lora_qwen0p6b.toml CHANGED Viewed

@@ -1,4 +1,7 @@
-type = "sft"
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "qlora"
 [job]
 model = "Qwen/Qwen3-0.6B"

examples/swe/task_app/README.md CHANGED Viewed

@@ -28,13 +28,13 @@ endpoints.
 ## Using the task app
 ```
-uvx synth-ai serve swe-mini --port 8020
+uvx synth-ai deploy --runtime uvicorn swe-mini --port 8020
 ```
 ### Recommended: non-interactive serve + .env
 ```bash
-uvx synth-ai serve swe-mini \
+uvx synth-ai deploy --runtime uvicorn swe-mini \
   --port 8020 \
   --env-file .env \
   --trace traces/v3 \
@@ -60,6 +60,36 @@ Execution is handled by mini-swe's environment classes. Configure execution via
 `SWE_MINI_ENVIRONMENT_CLASS` (`local`, `docker`, `singularity`, …) and pass
 additional keyword arguments with `SWE_MINI_ENVIRONMENT_KWARGS` (JSON).
+### Morph Cloud backend
+The task app now ships with a Morph-powered environment class so you can run
+mini-SWE rollouts in managed sandboxes. When `MORPH_API_KEY` is present the app
+defaults to this backend automatically unless you override
+`SWE_MINI_ENVIRONMENT_CLASS`.
+1. Install the optional dependencies: `pip install "synth-ai[swe]"`.
+2. Export your API key: `export MORPH_API_KEY=...`.
+3. Point the task app at Morph by setting:
+   ```bash
+   export SWE_MINI_ENVIRONMENT_CLASS=morph
+   export SWE_MINI_ENVIRONMENT_KWARGS='{
+     "snapshot_id": "snap_your_pre_baked_swebench_image",
+     "cwd": "/workspace/swebench",
+     "env": {"PIP_PROGRESS_BAR": "off"},
+     "metadata": {"project": "synth-ai", "task": "swe-mini"}
+   }'
+   ```
+   If you do not have a pre-built snapshot, provide `"image_id"` (defaults to
+   `morphvm-minimal`) along with resource hints (`"vcpus"`, `"memory_mb"`,
+   `"disk_mb"`). You can also set `SWE_MINI_MORPH_SNAPSHOT_ID` globally.
+During cleanup the backend deletes the remote workspace and stops the Morph
+instance automatically. All shell commands (including submissions) now execute
+inside the Morph sandbox, enabling RL workflows that require persistent remote
+compute.
 ### Tracing & SFT
 Tracing works the same as Crafter; pass `--trace` / `--trace-db` to the CLI or

examples/swe/task_app/grpo_swe_mini.py CHANGED Viewed

@@ -404,6 +404,10 @@ def _ensure_env_has_task(
         if not instance_id:
             raise ValueError("mini-swe rollout request requires env.config.instance_id")
         config["task"] = dataset.get(instance_id)
+    env_cfg = dict(config.get("environment") or {})
+    if "environment_class" not in env_cfg and os.getenv("MORPH_API_KEY"):
+        env_cfg["environment_class"] = "morph"
+    config["environment"] = env_cfg
     return env_spec.model_copy(update={"config": config})

examples/swe/task_app/hosted/envs/crafter/react_agent.py CHANGED Viewed

@@ -46,7 +46,7 @@ class CrafterReActAgent:
             "- Always return a single tool call: interact_many({actions: [...]})\n"
             "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
             "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
-            "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
+            "\n"
             "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
             "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
             "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"

examples/swe/task_app/hosted/envs/mini_swe/environment.py CHANGED Viewed

@@ -18,6 +18,7 @@ from typing import Any
 from minisweagent.environments import get_environment
 from synth_ai.environments.environment.tools import EnvToolCall
+from examples.swe.task_app.morph_backend import MorphSandboxBackend
 from .shared import summarise_history
 from .tools import TOOLS_SCHEMA
@@ -25,8 +26,9 @@ logger = logging.getLogger(__name__)
 def _environment_type_from_config(config: dict[str, Any]) -> str:
+    default = "morph" if os.getenv("MORPH_API_KEY") else "local"
     value = (config or {}).get("environment_class") or os.getenv(
-        "SWE_MINI_ENVIRONMENT_CLASS", "local"
+        "SWE_MINI_ENVIRONMENT_CLASS", default
     )
     return str(value).strip() or "local"
@@ -91,6 +93,7 @@ class MiniSweEnvironmentWrapper:
         self._local_workspace_dir: Path | None = None
         self._remote_workspace: str | None = None
         self._cleanup_workspace = False
+        self._using_morph_backend = False
         if self.environment_type == "local":
             workspace = self._prepare_local_workspace(kwargs)
@@ -117,11 +120,11 @@ class MiniSweEnvironmentWrapper:
             timeout = self.env_config.get("timeout")
             if timeout and "timeout" not in kwargs:
                 kwargs["timeout"] = int(timeout)
-            if self.repo_url and "image" not in kwargs:
+            if self.environment_type in {"docker", "bubblewrap"} and self.repo_url and "image" not in kwargs:
                 image = self.metadata.get("image_name") or os.getenv("SWE_MINI_DOCKER_IMAGE")
                 if image:
                     kwargs["image"] = image
-            if self.environment_type in {"docker", "bubblewrap"}:
+            if self.environment_type in {"docker", "bubblewrap", "morph"}:
                 remote_env = dict(kwargs.get("env") or {})
                 remote_env.setdefault("GIT_TERMINAL_PROMPT", "0")
                 kwargs["env"] = remote_env
@@ -131,13 +134,34 @@ class MiniSweEnvironmentWrapper:
             self.environment_type,
             kwargs,
         )
-        self.env = get_environment(
-            {
-                "environment_class": self.environment_type,
-                **kwargs,
-            },
-            default_type="local",
-        )
+        if self.environment_type == "morph":
+            morph_kwargs = dict(kwargs)
+            image_value = morph_kwargs.pop("image", None)
+            if image_value and "image_id" not in morph_kwargs:
+                morph_kwargs["image_id"] = image_value
+            timeout_value = morph_kwargs.pop("timeout", None)
+            if timeout_value is not None and "startup_timeout" not in morph_kwargs:
+                try:
+                    morph_kwargs["startup_timeout"] = int(timeout_value)
+                except Exception:
+                    logger.warning("Invalid timeout value for morph backend: %r", timeout_value)
+            metadata_override = morph_kwargs.pop("metadata", {}) or {}
+            metadata_payload = {
+                "app": "swe-mini",
+                "instance_id": self.instance_id,
+            }
+            metadata_payload.update({str(k): str(v) for k, v in dict(metadata_override).items()})
+            morph_kwargs["metadata"] = metadata_payload
+            self.env = MorphSandboxBackend(**morph_kwargs)
+            self._using_morph_backend = True
+        else:
+            self.env = get_environment(
+                {
+                    "environment_class": self.environment_type,
+                    **kwargs,
+                },
+                default_type="local",
+            )
         if self.environment_type != "local":
             self._bootstrap_remote_workspace()
@@ -181,6 +205,9 @@ class MiniSweEnvironmentWrapper:
             with contextlib.suppress(Exception):
                 self.env.execute(f"rm -rf {shlex.quote(self._remote_workspace)}")
         self._remote_workspace = None
+        if self._using_morph_backend and hasattr(self.env, "close"):
+            with contextlib.suppress(Exception):
+                self.env.close()
     def _resolve_repo_url(self, metadata: dict[str, Any]) -> str | None:
         candidates = [

examples/swe/task_app/hosted/inference/openai_client.py CHANGED Viewed

@@ -156,13 +156,13 @@ class OpenAIClient:
             keys_preview = sorted(processed_request.keys())
             logger.info(f"Request keys: {keys_preview}")
-        # Final hard-guard for OpenAI: ensure unsupported field is not present
+        # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
         try:
-            if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
+            low_url = url.lower()
+            if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
                 processed_request.pop("stop_after_tool_calls", None)
-                logger.info("Removed stop_after_tool_calls for OpenAI request")
+                logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
             # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
-            low_url = url.lower()
             if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
                 processed_request, dict
             ):

synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.17py3-none-any.whl