PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/blog_posts/pokemon_vl/README.md ADDED Viewed

@@ -0,0 +1,98 @@
+# Pokémon VL: Vision-Language RL Pipeline
+This playbook demonstrates end-to-end vision-language reinforcement learning on Pokémon Red using Synth AI's CLI tools. We follow the eval → collect data → SFT → RL → eval pipeline, but with vision models throughout.
+## Overview
+**Model**: Qwen3-VL-4B-Instruct (4B parameter vision-language model via Synth API)
+**Environment**: Pokémon Red (Game Boy emulation with vision support)
+**Benchmark**: Pallet Town progression task (leave bedroom → get starter → win first battle)
+## Pipeline Steps
+1. **Deploy Task App** - Host the Pokémon Red environment
+2. **Collect Vision Rollouts** - Generate high-quality demonstrations using Qwen3-VL
+3. **Filter Dataset** - Extract successful trajectories for supervised fine-tuning
+4. **Fine-Tune Qwen3-4B VL** - Train vision-language model on filtered data
+5. **Vision-Language RL** - Bootstrap RL training from SFT checkpoint
+6. **Final Evaluation** - Compare SFT and RL performance
+## Prerequisites
+```bash
+# Install dependencies
+uv pip install -e .
+# Setup authentication
+uvx synth-ai setup
+# Copy environment template
+cp examples/blog_posts/pokemon_vl/.env.example .env
+```
+## Quick Start
+```bash
+# Export trace database path
+export POKEMON_VL_TRACE_DB=traces/v3/pokemon_vl_blog.db
+# 1. Deploy task app
+uvx synth-ai deploy pokemon_red --runtime modal --name pokemon-vl-blog --env-file .env
+# 2. Collect vision rollouts with Qwen3-VL
+uvx synth-ai eval pokemon_red --config examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml --trace-db "${POKEMON_VL_TRACE_DB}"
+# 3. Filter high-reward trajectories
+uvx synth-ai filter --config examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml
+# 4. Fine-tune Qwen3-4B VL
+uvx synth-ai train --type sft --config examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml --env-file .env --poll
+# 5. RL from SFT checkpoint (replace JOB_ID)
+uvx synth-ai train --type rl --config examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml --env-file .env --poll
+# 6. Evaluate final RL model
+uvx synth-ai eval pokemon_red --config examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml --trace-db "${POKEMON_VL_TRACE_DB}"
+```
+## Vision Features
+- **Full Game Boy Frames**: Base64-encoded PNG screenshots (160x144 resolution)
+- **Vision-Only Mode**: Pure image understanding without text state
+- **Vision + Text Mode**: Combined visual and structured state information
+- **Efficient Action Batching**: `execute_sequence` tool for 5-10 actions per inference call
+## Expected Results
+| Stage | Model | Mean Reward | Success Rate | Best Achievement |
+|-------|-------|-------------|--------------|------------------|
+| Initial | Qwen3-VL (vision) | ~150 | 60% | Win first battle |
+| SFT | Qwen3-4B VL | ~200 | 75% | Win first battle + explore |
+| RL | Qwen3-4B VL + RL | ~350 | 85% | Complete Pallet Town |
+## Files
+- `configs/` - All TOML configuration files
+- `ft_data/` - Filtered datasets for fine-tuning
+- `.env.example` - Environment variables template
+## Vision Model Configuration
+The vision models receive:
+- **Input**: Game Boy screenshot + optional structured state (position, HP, party, etc.)
+- **Output**: Sequence of button presses via `execute_sequence` tool
+- **Action Space**: UP, DOWN, LEFT, RIGHT, A, B, START, SELECT with frame counts
+## Reward Function
+Dense rewards for Pallet Town progression:
+- Leave bedroom (+20)
+- Exit house (+30)
+- Find Oak's lab (+40)
+- Talk to Oak (+50)
+- Get starter Pokémon (+100)
+- Enter battle (+75)
+- Deal damage (+50 per 10HP)
+- Win battle (+150)
+Total possible: ~700 points

examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml ADDED Viewed

@@ -0,0 +1,26 @@
+[eval]
+app_id = "pokemon_red"
+task_app_url = "http://127.0.0.1:8914"
+model = "gpt-5-nano"
+seeds = [0]  # Single seed for testing
+max_turns = 10  # 10 LLM calls per episode to allow more progress
+concurrency = 1  # Run 1 rollout
+env_name = "pokemon_red"
+policy_name = "pokemon_vl_qwen3_vl"  # Reuse policy config, will override model
+trace_format = "full"
+return_trace = true
+[eval.policy_config]
+provider = "openai"  # Use OpenAI API for gpt-5-nano
+model = "gpt-5-nano"
+inference_url = "https://api.openai.com/v1"
+temperature = 0.7
+top_p = 0.95
+max_tokens = 512
+use_vision = true
+image_only_mode = false
+max_llm_calls = 10
+[eval.env_config.env_params]
+max_steps_per_episode = 100  # Allow time to achieve milestones

examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml ADDED Viewed

@@ -0,0 +1,27 @@
+[eval]
+app_id = "pokemon_red"
+task_app_url = "http://127.0.0.1:8914"
+model = "Qwen/Qwen3-VL-30B-A3B-Thinking"  # Larger thinking variant - needs more time to load
+seeds = [10, 11]  # 2 seeds for quick testing
+max_turns = 10  # 10 LLM calls per episode to allow more progress
+concurrency = 2  # Run 2 rollouts in parallel
+env_name = "pokemon_red"
+policy_name = "pokemon_vl_qwen3_vl"
+trace_format = "full"
+return_trace = true
+[eval.policy_config]
+provider = "synth"  # Use Synth internal API for vision models
+model = "Qwen/Qwen3-VL-30B-A3B-Thinking"  # Larger thinking variant - needs more time to load
+inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run/chat/completions"
+temperature = 1.0  # Higher temperature to encourage exploration
+top_p = 0.95
+max_tokens = 2048  # Reduced to avoid token budget issues
+use_vision = true
+image_only_mode = false
+max_llm_calls = 10
+thinking_mode = "think"  # Enable thinking/reasoning mode
+thinking_budget = 3072  # Increased token budget for reasoning
+[eval.env_config.env_params]
+max_steps_per_episode = 100  # Increased from 3 to allow time to achieve milestones

examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml ADDED Viewed

@@ -0,0 +1,24 @@
+[eval]
+app_id = "pokemon_red"
+task_app_url = "http://127.0.0.1:8914"
+model = "fft:REPLACE-WITH-RL-JOB-ID"  # Update with final RL job ID
+seeds = [100, 101, 102, 103, 104, 105, 106, 107, 108, 109]
+max_turns = 15  # Allow more steps for trained model
+concurrency = 3
+env_name = "pokemon_red"
+policy_name = "pokemon_vl_rl_final"
+trace_format = "full"
+return_trace = true
+[eval.policy_config]
+provider = "synth"
+model = "fft:REPLACE-WITH-RL-JOB-ID"  # Update with final RL job ID
+temperature = 0.1  # Lower temperature for evaluation
+top_p = 0.9
+max_tokens = 4096
+use_vision = true
+image_only_mode = false
+max_llm_calls = 15
+[eval.env_config.env_params]
+max_steps_per_episode = 15

examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml ADDED Viewed

@@ -0,0 +1,10 @@
+# Filter high-quality vision-language rollouts for SFT training
+# Assumes traces stored in pokemon_vl_blog.db via eval commands
+[filter]
+db = "traces/v3/pokemon_vl_blog.db"
+output = "examples/blog_posts/pokemon_vl/ft_data/pokemon_vl_high_reward.jsonl"
+min_official_score = 0.3  # Require at least 30% completion (Pallet Town progression)
+models = ["Qwen/Qwen3-VL-4B-Instruct"]  # Vision models used for rollouts
+shuffle = true
+shuffle_seed = 42

examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml ADDED Viewed

@@ -0,0 +1,43 @@
+# Vision-Language RL: Continue training Qwen3-4B VL from SFT checkpoint
+# Update task_url with deployed Modal task app URL
+# Set model.source to the SFT job id from `uvx synth-ai train --type sft`
+type = "rl"
+[services]
+task_url = "http://127.0.0.1:8914"
+[compute]
+gpu_type = "H100"
+gpu_count = 8
+[topology]
+gpus_for_vllm = 4
+gpus_for_training = 3
+gpus_for_ref = 1
+[vllm]
+tensor_parallel_size = 4
+[model]
+source = "fft:REPLACE-WITH-SFT-JOB-ID"  # Update with actual SFT job ID
+label = "pokemon_vl_rl_blog"
+supports_vision = true
+[rollout]
+max_turns = 10
+episodes_per_batch = 64
+task_app_origin_rewards_only = true
+[evaluation]
+instances = 100
+every_n_iters = 20
+seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
+[training]
+log_interval = 1
+[training.weight_sync]
+enable = true
+targets = ["policy"]
+weight_sync_interval = 1

examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml ADDED Viewed

@@ -0,0 +1,40 @@
+# Vision-Language Supervised Fine-Tuning: Qwen3-4B VL on filtered Pokémon rollouts
+# Update the `data` path once `uvx synth-ai filter` produces your JSONL
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "fft"
+[job]
+model = "Qwen/Qwen3-VL-4B-Instruct"  # Vision-enabled Qwen3-VL model
+data = "../ft_data/pokemon_vl_high_reward.jsonl"
+poll_seconds = 1800
+[compute]
+gpu_type = "H100"
+gpu_count = 4
+nodes = 1
+[data.topology]
+container_count = 4
+[training]
+mode = "full_finetune"
+use_qlora = false
+[hyperparameters]
+n_epochs = 2
+world_size = 4
+sequence_length = 4096  # Longer for vision tokens + text
+per_device_batch = 2
+gradient_accumulation_steps = 64
+learning_rate = 8e-6
+warmup_ratio = 0.03
+[hyperparameters.parallelism]
+use_deepspeed = true
+deepspeed_stage = 3
+fsdp = false
+bf16 = true
+fp16 = false

examples/blog_posts/pokemon_vl/extract_images.py ADDED Viewed

@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+"""Extract images from pokemon_vl trace database or trace JSON file and save to images_gpt5 directory.
+Usage:
+    # From trace database:
+    python extract_images.py --trace-db traces/v3/pokemon_vl_gpt5nano.db
+    # From trace JSON file:
+    python extract_images.py --trace-json trace.json
+"""
+import argparse
+import base64
+import json
+import sqlite3
+from pathlib import Path
+from typing import Any
+from synth_ai.tracing_v3.trace_utils import load_session_trace
+def extract_image_urls_from_content(content: Any) -> list[str]:
+    """Extract image URLs from message content."""
+    urls = []
+    if isinstance(content, list):
+        for part in content:
+            if isinstance(part, dict):
+                if part.get("type") == "image_url" and "image_url" in part:
+                    url = part["image_url"].get("url")
+                    if isinstance(url, str) and url.startswith("data:image"):
+                        urls.append(url)
+                elif part.get("type") == "image":
+                    img = part.get("image")
+                    if isinstance(img, str) and img.startswith("data:image"):
+                        urls.append(img)
+    elif isinstance(content, str):
+        # Check if it's a JSON string
+        try:
+            parsed = json.loads(content)
+            return extract_image_urls_from_content(parsed)
+        except:
+            pass
+    return urls
+def extract_state_info_from_message(message: dict[str, Any]) -> dict[str, Any]:
+    """Extract state info from message metadata or content."""
+    metadata = message.get("metadata", {})
+    state = {}
+    # Try to get state from metadata
+    if "system_state_before" in metadata:
+        state_before = metadata["system_state_before"]
+        if isinstance(state_before, dict):
+            obs = state_before.get("obs", {})
+            state.update({
+                "position": obs.get("position", "?"),
+                "map_id": obs.get("map_id", "?"),
+                "player_x": obs.get("player_x", "?"),
+                "player_y": obs.get("player_y", "?"),
+                "text_box_active": obs.get("text_box_active", False),
+            })
+    # Try to extract from content text
+    content = message.get("content", "")
+    if isinstance(content, str) and "position" in content:
+        try:
+            # Look for state summary in content
+            if "State summary:" in content:
+                parts = content.split("State summary:")
+                if len(parts) > 1:
+                    import ast
+                    state_str = parts[1].split("'")[0] if "'" not in parts[1] else parts[1]
+                    try:
+                        state_dict = ast.literal_eval(state_str.split("'")[0] if "'" in state_str else state_str)
+                        if isinstance(state_dict, dict):
+                            state.update({
+                                "position": state_dict.get("position", "?"),
+                                "map_id": state_dict.get("map_id", "?"),
+                                "player_x": state_dict.get("player_x", "?"),
+                                "player_y": state_dict.get("player_y", "?"),
+                                "text_box_active": state_dict.get("text_box_active", False),
+                            })
+                    except:
+                        pass
+        except:
+            pass
+    return state
+def extract_images_from_trace_dict(trace: dict[str, Any], output_dir: Path):
+    """Extract images from a trace dictionary."""
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Get messages from trace
+    messages = trace.get("markov_blanket_message_history", []) or trace.get("messages", [])
+    if not messages:
+        print(f"  No messages found in trace")
+        return 0
+    print(f"  Found {len(messages)} messages")
+    image_count = 0
+    step_idx = 0
+    for msg_idx, msg in enumerate(messages):
+        # Extract images from message content
+        content = msg.get("content", "")
+        image_urls = extract_image_urls_from_content(content)
+        if not image_urls:
+            continue
+        # Extract state info for filename
+        state = extract_state_info_from_message(msg)
+        for img_idx, img_url in enumerate(image_urls):
+            # Extract base64 data
+            if img_url.startswith("data:image"):
+                # Format: data:image/png;base64,<data>
+                parts = img_url.split(",", 1)
+                if len(parts) != 2:
+                    continue
+                b64_data = parts[1]
+                try:
+                    img_data = base64.b64decode(b64_data)
+                    # Create filename
+                    pos_str = f"{state.get('map_id', '?')}_{state.get('player_x', '?')},{state.get('player_y', '?')}"
+                    textbox_str = "True" if state.get("text_box_active") else "False"
+                    filename = f"step_{step_idx:03d}_pos_{pos_str}_textbox_{textbox_str}.png"
+                    filepath = output_dir / filename
+                    filepath.write_bytes(img_data)
+                    print(f"  Saved: {filename}")
+                    image_count += 1
+                    step_idx += 1
+                except Exception as e:
+                    print(f"  Error decoding image: {e}")
+                    continue
+    return image_count
+def extract_images_from_trace_db(trace_db: str, output_dir: Path, model_filter: str | None = None):
+    """Extract images from trace database and save to output directory."""
+    conn = sqlite3.connect(trace_db)
+    conn.row_factory = sqlite3.Row
+    # Get all session IDs
+    query = "SELECT session_id, metadata FROM session_traces"
+    if model_filter:
+        query += " WHERE metadata LIKE ?"
+        params = (f'%{model_filter}%',)
+    else:
+        params = ()
+    rows = conn.execute(query, params).fetchall()
+    if not rows:
+        print(f"No traces found in {trace_db}")
+        return
+    print(f"Found {len(rows)} trace(s)")
+    total_images = 0
+    for row in rows:
+        session_id = row["session_id"]
+        print(f"\nProcessing session: {session_id}")
+        try:
+            trace = load_session_trace(conn, session_id)
+        except Exception as e:
+            print(f"  Error loading trace: {e}")
+            continue
+        count = extract_images_from_trace_dict(trace, output_dir)
+        total_images += count
+    conn.close()
+    print(f"\n✓ Extracted {total_images} images to {output_dir}/")
+def extract_images_from_trace_json(trace_json: Path, output_dir: Path):
+    """Extract images from trace JSON file."""
+    print(f"Loading trace from {trace_json}")
+    with open(trace_json) as f:
+        trace = json.load(f)
+    # Handle trace wrapped in "session_trace" key
+    if "session_trace" in trace:
+        trace = trace["session_trace"]
+    count = extract_images_from_trace_dict(trace, output_dir)
+    print(f"\n✓ Extracted {count} images to {output_dir}/")
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--trace-db",
+        help="Path to trace database",
+    )
+    parser.add_argument(
+        "--trace-json",
+        type=Path,
+        help="Path to trace JSON file",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="examples/blog_posts/pokemon_vl/images_gpt5",
+        help="Output directory for images",
+    )
+    parser.add_argument(
+        "--model-filter",
+        help="Filter traces by model name (optional)",
+    )
+    args = parser.parse_args()
+    output_dir = Path(args.output_dir)
+    if args.trace_json:
+        extract_images_from_trace_json(args.trace_json, output_dir)
+    elif args.trace_db:
+        extract_images_from_trace_db(args.trace_db, output_dir, args.model_filter)
+    else:
+        parser.error("Must provide either --trace-db or --trace-json")
+if __name__ == "__main__":
+    main()

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl