PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show

examples/analyze_semantic_words.sh +2 -2
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +5 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -4
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +36 -5
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
synth_ai/api/train/builders.py +9 -3
synth_ai/api/train/cli.py +125 -10
synth_ai/api/train/configs/__init__.py +8 -1
synth_ai/api/train/configs/rl.py +32 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/auth/credentials.py +119 -0
synth_ai/cli/__init__.py +12 -4
synth_ai/cli/commands/__init__.py +17 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/deploy/__init__.py +23 -0
synth_ai/cli/commands/deploy/core.py +614 -0
synth_ai/cli/commands/deploy/errors.py +72 -0
synth_ai/cli/commands/deploy/validation.py +11 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1109 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +388 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +73 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +199 -0
synth_ai/cli/commands/train/judge_validation.py +304 -0
synth_ai/cli/commands/train/validation.py +443 -0
synth_ai/cli/demo.py +2 -162
synth_ai/cli/deploy/__init__.py +28 -0
synth_ai/cli/deploy/core.py +5 -0
synth_ai/cli/deploy/errors.py +23 -0
synth_ai/cli/deploy/validation.py +5 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +58 -1487
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -11
synth_ai/learning/rl/client.py +3 -1
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +469 -0
synth_ai/streaming/streamer.py +301 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/validators.py +2 -2
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/utils/env.py +25 -18
synth_ai/utils/http.py +4 -1
synth_ai/utils/modal.py +2 -2
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0

examples/qwen_vl/README.md CHANGED Viewed

@@ -55,21 +55,21 @@ uvx synth-ai train --type sft --config configs/vision_sft/crafter_qwen3vl_8b_gpt
 Run Crafter agent using Qwen-VL models via synth-ai's hosted inference.
 **Models supported:**
-- `Qwen/Qwen2-VL-7B-Instruct`
-- `Qwen/Qwen2-VL-2B-Instruct`
-- `Qwen/Qwen3-VL-8B` (or any Qwen VL variant)
+- `Qwen/Qwen3-VL-2B-Instruct`
+- `Qwen/Qwen3-VL-4B-Instruct`
+- `Qwen/Qwen3-VL-8B-Instruct` (or any Qwen3 VL variant)
 **Usage:**
 ```bash
-# Run with Qwen2-VL-7B
+# Run with Qwen3-VL-4B
 uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
-  --model Qwen/Qwen2-VL-7B-Instruct \
+  --model Qwen/Qwen3-VL-4B-Instruct \
   --seeds 10 \
   --steps 20
 # Run with Qwen3-VL-8B
 uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
-  --model Qwen/Qwen3-VL-8B \
+  --model Qwen/Qwen3-VL-8B-Instruct \
   --seeds 10 \
   --steps 20
 ```
@@ -113,13 +113,13 @@ uv run python examples/qwen_vl/collect_vision_traces.py \
   --max-steps 50 \
   --output-dir traces/gpt5nano_vision
-# Collect traces with Qwen2-VL via synth
+# Collect traces with Qwen3-VL via synth
 uv run python examples/qwen_vl/collect_vision_traces.py \
-  --model Qwen/Qwen2-VL-7B-Instruct \
+  --model Qwen/Qwen3-VL-8B-Instruct \
   --provider synth \
   --episodes 100 \
   --max-steps 50 \
-  --output-dir traces/qwen2vl_vision
+  --output-dir traces/qwen3vl_vision
 ```
 **Output:** SQLite database with multimodal traces ready for SFT export.
@@ -132,8 +132,7 @@ CrafterPolicy automatically detects vision capability from model names:
 - ✅ `gpt-5*` → Vision enabled
 - ✅ `gpt-4o*` → Vision enabled
 - ✅ `*qwen-vl*` → Vision enabled
-- ✅ `*qwen2-vl*` → Vision enabled
-- ✅ `qwen3-vl*` → Vision enabled
+- ✅ `*qwen3-vl*` → Vision enabled
 Or set explicitly: `policy.use_vision = True`
@@ -151,4 +150,3 @@ Crafter environment provides observations as:
 3. Export to SFT JSONL format (see `vision_sft_rl.txt`)
 4. Train VLM with LoRA (see monorepo SFT configs)
 5. Fine-tune with RL/GRPO

examples/qwen_vl/SETUP_COMPLETE.md CHANGED Viewed

@@ -15,10 +15,10 @@ Complete vision-language model (VLM) infrastructure for Crafter with image obser
 ### **Configuration Files**
 6. **`configs/eval_gpt5nano_vision.toml`** - Eval config for gpt-5-nano
-7. **`configs/eval_qwen2vl_vision.toml`** - Eval config for Qwen2-VL
+7. **`configs/eval_qwen3vl_vision.toml`** - Eval config for Qwen3-VL
 8. **`configs/eval_gpt4o_mini_vision.toml`** - Eval config for gpt-4o-mini (stronger teacher)
 9. **`configs/filter_vision_sft.toml`** - Filter config for gpt-5-nano traces
-10. **`configs/filter_qwen2vl_sft.toml`** - Filter config for Qwen2-VL traces
+10. **`configs/filter_qwen3vl_sft.toml`** - Filter config for Qwen3-VL traces
 11. **`configs/crafter_vlm_sft_example.toml`** - Example SFT training config
 ### **Documentation**
@@ -81,7 +81,7 @@ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py --seeds 5 --steps 10
 - Stores traces to SQLite with base64-encoded images
 - Supports parallel episodes for faster collection
-**Config:** `eval_gpt5nano_vision.toml`, `eval_qwen2vl_vision.toml`, etc.
+**Config:** `eval_gpt5nano_vision.toml`, `eval_qwen3vl_vision.toml`, etc.
 ### **synth-ai filter** (Quality Filtering)
 - Removes low-quality episodes (too short, errors, loops)
@@ -89,7 +89,7 @@ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py --seeds 5 --steps 10
 - Exports to SFT JSONL format (OpenAI-style messages)
 - Splits into train/val sets
-**Config:** `filter_vision_sft.toml`, `filter_qwen2vl_sft.toml`
+**Config:** `filter_vision_sft.toml`, `filter_qwen3vl_sft.toml`
 ### **synth-ai train** (Model Training)
 - Trains VLM with LoRA on collected traces
@@ -194,13 +194,13 @@ model = "gpt-4o-mini-2024-07-18"  # Stronger teacher
 ### Collect More Episodes
 ```toml
 [eval]
-num_episodes = 500  # Default: 100
+seeds = "0-499"  # Default: "0-99"
 ```
 ### Change Image Resolution
 ```toml
-[task.config]
-render_size = [128, 128]  # Default: [64, 64]
+[eval.env_config]
+env_params = {render_size = [128, 128]}  # Default: [64, 64]
 ```
 ### Adjust Quality Filters
@@ -272,4 +272,3 @@ min_achievements_per_episode = 0
 ---
 **Infrastructure ready!** 🎉 Start collecting vision traces and training your VLM! 🚀

examples/qwen_vl/VISION_TESTS_COMPLETE.md CHANGED Viewed

@@ -33,7 +33,7 @@ test_vision_inference_multiple_images()         # Multiple images per message
 **File:** `tests/integration/cli/test_cli_train_sft_vision.py`
 ```python
-test_cli_train_sft_vision_qwen2vl()            # Full SFT job submission
+test_cli_train_sft_vision_qwen3vl()            # Full SFT job submission
 test_vision_sft_dataset_validation()           # Dataset quality checks
 test_cli_train_sft_vision_small_config()       # Fast CI test
 ```
@@ -478,7 +478,7 @@ tests/integration/cli/test_cli_inference_vision.py::test_vision_inference_valida
 tests/integration/cli/test_cli_inference_vision.py::test_vision_inference_multiple_images PASSED
 tests/integration/cli/test_cli_train_sft_vision.py::test_vision_sft_dataset_validation PASSED
 tests/integration/cli/test_cli_train_sft_vision.py::test_cli_train_sft_vision_small_config PASSED
-tests/integration/cli/test_cli_train_sft_vision.py::test_cli_train_sft_vision_qwen2vl PASSED
+tests/integration/cli/test_cli_train_sft_vision.py::test_cli_train_sft_vision_qwen3vl PASSED
 tests/integration/cli/test_cli_train_rl_vision.py::test_task_app_vision_support PASSED
 tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_small_config PASSED
 tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_qwen3vl4b PASSED
@@ -487,4 +487,3 @@ tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_qwen
 ```
 **Status:** 🎯 Production-ready! Complete vision ML pipeline tested from inference through RL training! 🎉

examples/qwen_vl/collect_data_via_cli.md CHANGED Viewed

@@ -5,8 +5,8 @@ Use synth-ai's built-in CLI tools to collect vision traces for SFT training.
 ## 📋 Overview
 **Pipeline:**
-1. `synth-ai serve` → Start Crafter task app with vision support
-2. `synth-ai eval` → Run rollouts with gpt-5-nano or Qwen-VL, collect traces
+1. `synth-ai deploy --runtime=uvicorn` → Start the Crafter task app locally
+2. `synth-ai eval` → Run rollouts with GPT-4o Mini or Qwen3-VL and collect traces
 3. `synth-ai filter` → Filter traces by quality, convert to SFT format
 ---
@@ -19,9 +19,10 @@ Use synth-ai's built-in CLI tools to collect vision traces for SFT training.
 cd /Users/joshpurtell/Documents/GitHub/synth-ai
 # Serve Crafter task app on localhost:8000
-uvx synth-ai serve \
-  --task-app examples/task_apps/crafter/task_app/synth_envs_hosted/main.py \
-  --port 8000
+uvx synth-ai deploy grpo-crafter-task-app \
+  --runtime uvicorn \
+  --port 8000 \
+  --trace traces/v3
 ```
 **Output:**
@@ -32,7 +33,7 @@ uvx synth-ai serve \
 ### Option B: Use Hosted Task App (Modal)
-If you have a deployed Crafter task app on Modal:
+If you already have a deployed Crafter task app on Modal:
 ```bash
 export TASK_APP_URL="https://synth-laboratories--grpo-crafter-task-app.modal.run"
 ```
@@ -41,40 +42,36 @@ export TASK_APP_URL="https://synth-laboratories--grpo-crafter-task-app.modal.run
 ## 🎯 Step 2: Run Eval with Vision Models
-### Collect gpt-5-nano Traces (OpenAI)
+### Collect GPT-4o-mini Vision Traces (OpenAI)
 Create eval config: `examples/qwen_vl/configs/eval_gpt5nano_vision.toml`
 ```toml
-# Evaluation config for gpt-5-nano with vision
+# Evaluation config for gpt-4o-mini (vision)
+# Legacy filename kept for convenience
 [eval]
-model = "gpt-5-nano"
-provider = "openai"  # Use OpenAI API
+app_id = "grpo-crafter-task-app"
 task_app_url = "http://localhost:8000"  # or your hosted URL
-# Vision settings
-use_vision = true
-image_only_mode = false  # Include both text + images
-# Rollout settings
-num_episodes = 100
-max_steps_per_episode = 50
-seeds = "0-99"  # Seeds 0 through 99
-# Sampling
+model = "gpt-4o-mini-2024-07-18"
+seeds = "0-99"
+max_turns = 50
+concurrency = 5
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "structured"
+return_trace = true
+[eval.env_config]
+env_params = {max_steps_per_episode = 50}
+[eval.policy_config]
+provider = "openai"
+model = "gpt-4o-mini-2024-07-18"
 temperature = 0.7
 max_tokens = 512
-# Trace collection
-collect_traces = true
-trace_db = "traces/gpt5nano_vision/rollouts.db"
-# Tools
+use_vision = true
+image_only_mode = false
 use_tools = true
-[task]
-name = "crafter"
-environment = "crafter-classic"
 ```
 **Run evaluation:**
@@ -83,15 +80,15 @@ export OPENAI_API_KEY="sk-..."
 uvx synth-ai eval \
   --config examples/qwen_vl/configs/eval_gpt5nano_vision.toml \
-  --output-dir traces/gpt5nano_vision
+  --trace-db traces/gpt4omini_vision/rollouts.db
 ```
 **Expected output:**
 ```
-🎮 Running evaluation: gpt-5-nano on crafter
+🎮 Running evaluation: gpt-4o-mini on crafter
 📊 Episodes: 100, Max steps: 50
 🔍 Vision: enabled (auto-detected from model name)
-📦 Collecting traces to: traces/gpt5nano_vision/rollouts.db
+📦 Collecting traces to: traces/gpt4omini_vision/rollouts.db
 Episode 0/100 (seed=0): 50 steps, 3 achievements ✓
 Episode 1/100 (seed=1): 48 steps, 2 achievements ✓
@@ -103,45 +100,40 @@ Episode 99/100 (seed=99): 50 steps, 3 achievements ✓
    Total episodes: 100
    Total steps: 4,923
    Avg achievements: 2.8
-   Traces saved to: traces/gpt5nano_vision/rollouts.db
+   Traces saved to: traces/gpt4omini_vision/rollouts.db
 ```
 ---
-### Collect Qwen-VL Traces (synth-ai hosted)
+### Collect Qwen3-VL Traces (Synth hosted inference)
-Create eval config: `examples/qwen_vl/configs/eval_qwen2vl_vision.toml`
+Create eval config: `examples/qwen_vl/configs/eval_qwen3vl_vision.toml`
 ```toml
-# Evaluation config for Qwen2-VL via synth-ai
+# Evaluation config for Qwen3-VL vision rollouts
 [eval]
-model = "Qwen/Qwen2-VL-7B-Instruct"
-provider = "synth"  # Use synth-ai hosted inference
+app_id = "grpo-crafter-task-app"
 task_app_url = "http://localhost:8000"
-# Vision settings (auto-detected from model name)
-use_vision = true
-image_only_mode = false
-# Rollout settings
-num_episodes = 100
-max_steps_per_episode = 50
-seeds = "0-99"
-# Sampling
+model = "Qwen/Qwen3-VL-8B-Instruct"
+seeds = "100-199"
+max_turns = 50
+concurrency = 5
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "structured"
+return_trace = true
+[eval.env_config]
+env_params = {max_steps_per_episode = 50}
+[eval.policy_config]
+provider = "synth"
+model = "Qwen/Qwen3-VL-8B-Instruct"
 temperature = 0.7
 max_tokens = 512
-# Trace collection
-collect_traces = true
-trace_db = "traces/qwen2vl_vision/rollouts.db"
-# Tools
+use_vision = true
+image_only_mode = false
 use_tools = true
-[task]
-name = "crafter"
-environment = "crafter-classic"
 ```
 **Run evaluation:**
@@ -149,8 +141,8 @@ environment = "crafter-classic"
 export SYNTH_API_KEY="sk_live_..."
 uvx synth-ai eval \
-  --config examples/qwen_vl/configs/eval_qwen2vl_vision.toml \
-  --output-dir traces/qwen2vl_vision
+  --config examples/qwen_vl/configs/eval_qwen3vl_vision.toml \
+  --trace-db traces/qwen3vl_vision/rollouts.db
 ```
 ---
@@ -169,8 +161,8 @@ Create `examples/qwen_vl/configs/filter_vision_sft.toml`:
 ```toml
 # Filter vision traces for SFT training
 [filter]
-input_db = "traces/gpt5nano_vision/rollouts.db"
-output_dir = "traces/gpt5nano_vision/sft"
+input_db = "traces/gpt4omini_vision/rollouts.db"
+output_dir = "traces/gpt4omini_vision/sft"
 # Quality filters
 min_steps_per_episode = 5
@@ -205,7 +197,7 @@ uvx synth-ai filter \
 **Expected output:**
 ```
-📂 Loading traces from traces/gpt5nano_vision/rollouts.db
+📂 Loading traces from traces/gpt4omini_vision/rollouts.db
    Total episodes: 100
    Total steps: 4,923
@@ -222,8 +214,8 @@ uvx synth-ai filter \
    ✓ Final dataset: 4,190 samples
 ✂️ Splitting train/val (90%/10%)...
-   ✓ Train: 3,771 samples → traces/gpt5nano_vision/sft/train.jsonl
-   ✓ Val: 419 samples → traces/gpt5nano_vision/sft/val.jsonl
+   ✓ Train: 3,771 samples → traces/gpt4omini_vision/sft/train.jsonl
+   ✓ Val: 419 samples → traces/gpt4omini_vision/sft/val.jsonl
 ✅ Filter complete!
 ```
@@ -236,7 +228,7 @@ Check the SFT JSONL format:
 ```bash
 # Inspect first sample
-head -1 traces/gpt5nano_vision/sft/train.jsonl | jq .
+head -1 traces/gpt4omini_vision/sft/train.jsonl | jq .
 ```
 **Expected format:**
@@ -282,7 +274,7 @@ head -1 traces/gpt5nano_vision/sft/train.jsonl | jq .
     "step": 12,
     "seed": 42,
     "has_image": true,
-    "model": "gpt-5-nano"
+    "model": "gpt-4o-mini-2024-07-18"
   }
 }
 ```
@@ -301,8 +293,8 @@ export BACKEND_BASE_URL="https://synth-backend-dev-docker.onrender.com/api"
 uvx synth-ai train \
   --type sft \
   --config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml \
-  --dataset traces/gpt5nano_vision/sft/train.jsonl \
-  --eval-dataset traces/gpt5nano_vision/sft/val.jsonl \
+  --dataset traces/gpt4omini_vision/sft/train.jsonl \
+  --eval-dataset traces/gpt4omini_vision/sft/val.jsonl \
   --env-file backend/.env.dev
 ```
@@ -313,15 +305,16 @@ uvx synth-ai train \
 ```bash
 # Terminal 1: Serve task app
 cd /Users/joshpurtell/Documents/GitHub/synth-ai
-uvx synth-ai serve \
-  --task-app examples/task_apps/crafter/task_app/synth_envs_hosted/main.py \
-  --port 8000
+uvx synth-ai deploy grpo-crafter-task-app \
+  --runtime uvicorn \
+  --port 8000 \
+  --trace traces/v3
 # Terminal 2: Collect traces
 export OPENAI_API_KEY="sk-..."
 uvx synth-ai eval \
   --config examples/qwen_vl/configs/eval_gpt5nano_vision.toml \
-  --output-dir traces/gpt5nano_vision
+  --trace-db traces/gpt4omini_vision/rollouts.db
 # Terminal 2: Filter and export
 uvx synth-ai filter \
@@ -333,8 +326,8 @@ export BACKEND_BASE_URL="https://synth-backend-dev-docker.onrender.com/api"
 uvx synth-ai train \
   --type sft \
   --config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml \
-  --dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt5nano_vision/sft/train.jsonl \
-  --eval-dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt5nano_vision/sft/val.jsonl \
+  --dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt4omini_vision/sft/train.jsonl \
+  --eval-dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt4omini_vision/sft/val.jsonl \
   --env-file backend/.env.dev
 ```
@@ -345,7 +338,7 @@ uvx synth-ai train \
 | Step | Duration | Cost | Notes |
 |------|----------|------|-------|
 | 1. Serve | Continuous | Free | Local or Modal |
-| 2. Eval (100 episodes) | 30-60 min | ~$1-2 | OpenAI gpt-5-nano |
+| 2. Eval (100 episodes) | 30-60 min | ~$1-2 | OpenAI gpt-4o-mini |
 | 3. Filter | < 5 min | Free | Local processing |
 | 4. SFT (2 epochs) | 2-4 hrs | ~$21 | 2x H200 on Modal |
@@ -364,12 +357,12 @@ uvx synth-ai eval --config configs/eval_gpt5nano_vision.toml
 # Collect from gpt-4o-mini (stronger teacher)
 uvx synth-ai eval --config configs/eval_gpt4o_mini_vision.toml
-# Collect from Qwen2-VL (for comparison)
-uvx synth-ai eval --config configs/eval_qwen2vl_vision.toml
+# Collect from Qwen3-VL (for comparison)
+uvx synth-ai eval --config configs/eval_qwen3vl_vision.toml
 # Merge and filter all traces
 uvx synth-ai filter \
-  --input-dbs traces/gpt5nano_vision/rollouts.db,traces/gpt4o_mini_vision/rollouts.db \
+  --input-dbs traces/gpt4omini_vision/rollouts.db,traces/qwen3vl_vision/rollouts.db \
   --output-dir traces/merged_vision/sft \
   --config configs/filter_vision_sft.toml
 ```
@@ -402,7 +395,7 @@ curl http://localhost:8000/health
 ```
 ### Traces not saving
-Ensure `collect_traces = true` in eval config and `trace_db` path is writable.
+Ensure you pass `--trace-db` (or accept the default) so traces land in a SQLite/Turso database.
 ### Filter removes all samples
 Lower quality thresholds:
@@ -420,4 +413,3 @@ min_achievements_per_episode = 0  # Allow episodes with no achievements
 - **Eval Config Schema:** `synth-ai eval --help`
 - **Filter Config Schema:** `synth-ai filter --help`
 - **Full Pipeline:** See `/Users/joshpurtell/Documents/GitHub/monorepo/vision_sft_rl.txt`

examples/qwen_vl/collect_vision_traces.py CHANGED Viewed

@@ -22,13 +22,13 @@ Usage:
       --max-steps 50 \
       --output-dir traces/gpt5nano_vision
-  # Collect with Qwen2-VL via synth
+  # Collect with Qwen3-VL via synth
   uv run python examples/qwen_vl/collect_vision_traces.py \
-      --model Qwen/Qwen2-VL-7B-Instruct \
+      --model Qwen/Qwen3-VL-8B-Instruct \
       --provider synth \
       --episodes 100 \
       --max-steps 50 \
-      --output-dir traces/qwen2vl_vision
+      --output-dir traces/qwen3vl_vision
 """
 from __future__ import annotations
@@ -333,7 +333,7 @@ async def main() -> None:
     parser.add_argument(
         "--model",
         required=True,
-        help="Model name (e.g., gpt-5-nano, Qwen/Qwen2-VL-7B-Instruct)",
+        help="Model name (e.g., gpt-5-nano, Qwen/Qwen3-VL-8B-Instruct)",
     )
     parser.add_argument(
         "--provider",

examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml CHANGED Viewed

@@ -1,19 +1,9 @@
-# Crafter RL with Vision - Qwen3-VL-4B
-#
-# This configuration runs online RL (GRPO/GSPO) with a vision-language model
-# using the same Crafter task app that generates image observations for SFT data.
-#
-# Model: Qwen/Qwen3-VL-4B (smaller, faster for testing)
-# Task App: grpo-crafter-task-app (Modal deployed, supports vision)
-# Policy: crafter-react with use_vision=true, image_only_mode=true
 [algorithm]
 type = "online"
 method = "policy_gradient"
 variety = "gspo"
 [services]
-# Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
 task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
 [compute]
@@ -30,8 +20,6 @@ tensor_parallel = 1
 [vllm]
 tensor_parallel_size = 1
 max_model_len = 4096
-# Vision-specific settings
-limit_mm_per_prompt = { "image": 1 }  # Max 1 image per prompt
 [reference]
 placement = "none"
@@ -40,88 +28,83 @@ placement = "none"
 base = "Qwen/Qwen3-VL-4B-Instruct"
 trainer_mode = "lora"
 label = "crafter-rl-vision-qwen3vl4b"
-supports_vision = true  # Enable vision support
+supports_vision = true
 [lora]
 r = 16
 alpha = 32
 dropout = 0.05
-target_modules = ["all-linear"]
-# Note: will automatically include mm_projector for vision models
+target_modules = [ "all-linear",]
 [rollout]
 env_name = "crafter"
-max_turns = 10  # 10 steps per episode for faster testing
+max_turns = 10
 episodes_per_batch = 2
 policy_name = "crafter-react"
-max_concurrent_rollouts = 4  # Lower for vision models (memory)
+max_concurrent_rollouts = 4
 batches_per_step = 2
-ops = ["agent", "env"]
-  [rollout.env_config]
-  difficulty = "easy"
-    [rollout.env_config.step_rewards]
-    enabled = true
-    mode = "decision_stepwise"
-    strategy = "consistent"
-    indicator_lambda = 1.0
-    step_beta = 0.0
-  [rollout.policy_config]
-  # Vision-specific policy settings
-  use_vision = true  # Enable vision input
-  image_only_mode = true  # Use only images, no text observations
-  temperature = 0.6  # Slightly higher for exploration
-  top_p = 0.95
-  max_tokens = 512
-  max_llm_calls = 10
+ops = [ "agent", "env",]
 [evaluation]
-instances = 8  # Lower for faster vision evals
+instances = 8
 every_n_iters = 5
-seeds = [0, 1, 2, 3, 4, 5, 6, 7]
+seeds = [ 0, 1, 2, 3, 4, 5, 6, 7,]
 [training]
 num_epochs = 1
-iterations_per_epoch = 3  # Shorter for integration test
+iterations_per_epoch = 3
 gradient_accumulation_steps = 2
 max_accumulated_minibatch = 1
 max_turns = 10
-batch_size = 2  # Smaller for vision models
+batch_size = 2
 group_size = 2
 learning_rate = 5e-5
 log_interval = 1
 weight_sync_interval = 1
 event_rewards_kind = "unique"
-async_semaphore_max = 2  # Lower concurrency for vision
-# Enable dense decision rewards
+async_semaphore_max = 2
 step_rewards_enabled = true
 step_rewards_mode = "decision_stepwise"
 step_rewards_indicator_lambda = 1.0
 step_rewards_beta = 0.0
 step_rewards_strategy = "consistent"
+max_images_per_message = 1
+supports_vision = true
+[tags]
+experiment = "crafter_rl_vision_qwen3vl4b"
+task = "crafter_agent_vision"
+model_size = "4b"
+vision_enabled = true
+image_only = true
-# Vision-specific training settings
-max_images_per_message = 1  # Limit images for memory
-supports_vision = true  # Enable vision training path
+[vllm.limit_mm_per_prompt]
+image = 1
+[rollout.env_config]
+difficulty = "easy"
+[rollout.policy_config]
+use_vision = true
+image_only_mode = true
+temperature = 0.6
+top_p = 0.95
+max_tokens = 512
+max_llm_calls = 10
 [training.weight_sync]
 enable = true
-targets = ["policy"]
+targets = [ "policy",]
 mode = "direct"
 direct = true
 verify_every_k = 0
-[judge]
-type = "env"  # Use environment rewards only (simpler for testing)
+[judge.options]
 timeout_s = 30
-[tags]
-experiment = "crafter_rl_vision_qwen3vl4b"
-task = "crafter_agent_vision"
-model_size = "4b"
-vision_enabled = true
-image_only = true
+[rollout.env_config.step_rewards]
+enabled = true
+mode = "decision_stepwise"
+strategy = "consistent"
+indicator_lambda = 1.0
+step_beta = 0.0

examples/qwen_vl/configs/crafter_vlm_sft_example.toml CHANGED Viewed

@@ -7,7 +7,7 @@ method = "sft"
 variety = "lora"
 [job]
-model = "Qwen/Qwen2-VL-7B-Instruct"  # or Qwen/Qwen3-VL-8B
+model = "Qwen/Qwen3-VL-8B-Instruct"  # or Qwen/Qwen3-VL-4B-Instruct
 # Dataset from collect_vision_traces.py → export_to_sft.py
 data = "traces/gpt5nano_vision/train.jsonl"
@@ -57,4 +57,3 @@ task = "crafter"
 modality = "vision"
 data_source = "collected_traces"
 model_family = "qwen_vl"

synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.17py3-none-any.whl