PyPI - synth-ai - Versions diffs - 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (169) hide show

examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
examples/rl/configs/rl_from_base_qwen17.toml +1 -0
examples/swe/task_app/hosted/inference/openai_client.py +0 -34
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/task_app.py +254 -36
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
synth_ai/api/train/builders.py +90 -1
synth_ai/api/train/cli.py +396 -21
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +15 -1
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +29 -0
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +85 -17
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +1 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/eval/core.py +13 -10
synth_ai/cli/commands/filter/core.py +53 -17
synth_ai/cli/commands/help/core.py +0 -1
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/train/judge_schemas.py +1 -0
synth_ai/cli/commands/train/judge_validation.py +1 -0
synth_ai/cli/commands/train/validation.py +0 -57
synth_ai/cli/demo.py +35 -3
synth_ai/cli/deploy/__init__.py +40 -25
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/task_app_deploy.py +1 -1
synth_ai/cli/task_apps.py +53 -53
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/judge_schemas.py +1 -0
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/handlers.py +53 -4
synth_ai/streaming/streamer.py +19 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +44 -8
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +17 -17
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +283 -1
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
synth_ai/cli/commands/deploy/__init__.py +0 -23
synth_ai/cli/commands/deploy/core.py +0 -614
synth_ai/cli/commands/deploy/errors.py +0 -72
synth_ai/cli/commands/deploy/validation.py +0 -11
synth_ai/cli/deploy/core.py +0 -5
synth_ai/cli/deploy/errors.py +0 -23
synth_ai/cli/deploy/validation.py +0 -5
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md ADDED Viewed

@@ -0,0 +1,253 @@
+# Smoke Test Implementation - Complete
+## Summary
+The smoke test now provides **complete visibility into RL training rollouts**, including:
+✅ **Auto-start background services** (sqld, task app)
+✅ **Real OpenAI inference** with gpt-4o-mini
+✅ **Tool call display** - see every action the policy takes
+✅ **Trace validation** - verify v3 trace format
+✅ **Clean output** - all diagnostic noise suppressed
+## Quick Start
+```bash
+cd examples/blog_posts/warming_up_to_rl
+uv run synth-ai smoke --config configs/smoke_test.toml
+```
+**Output shows:**
+- Service startup (sqld, task app)
+- Real-time inference requests
+- **All 10 tool calls with arguments** (e.g., `interact_many({"actions":["move_up","move_up"]})`)
+- Rollout metrics (steps, returns, rewards)
+- Success validation
+## Documentation
+All documentation has been updated for future agents:
+### 1. User Documentation
+- **`SMOKE_TESTING.md`** - How to run smoke tests, what to expect
+- **`configs/smoke_test.toml`** - Well-commented example configuration
+- **`monorepo/docs/cli/smoke.mdx`** - Mintlify CLI documentation
+### 2. Developer Documentation
+- **`ARCHITECTURE.md`** - Internal architecture, troubleshooting guide
+- **`synth_ai/cli/commands/smoke/core.py`** - Extensive inline comments explaining tool call extraction
+### 3. Code Comments
+**Tool Call Extraction (core.py lines 946-997):**
+```python
+# Extract and display tool calls from v3 trace
+#
+# IMPORTANT: Tool calls are extracted from the structured v3 trace format.
+# The trace must be requested with return_trace=True for this to work.
+#
+# Trace structure:
+#   trace.event_history[] - list of events (policy calls, env steps)
+#     ├─ event.call_records[] - LLM calls made during this event
+#        ├─ call_record.output_tool_calls[] - tool calls from LLM response
+#           ├─ tool_call.name - function name (e.g., "interact_many")
+#           └─ tool_call.arguments_json - JSON string of arguments
+```
+## Key Implementation Details
+### Tool Call Display
+**Requirements:**
+1. `return_trace = true` in config (CRITICAL - without this, no tool calls)
+2. v3 trace format (`trace_format="structured"`)
+3. Mock proxy or real inference (direct API calls don't populate traces correctly)
+**Data Flow:**
+```
+1. Rollout request with return_trace=True
+   ↓
+2. Task app makes LLM calls, captures responses
+   ↓
+3. LLM responses include tool_calls
+   ↓
+4. Task app stores call_records in event_history
+   ↓
+5. Smoke command extracts from trace.event_history[].call_records[].output_tool_calls[]
+   ↓
+6. Display: TOOL_CALL[N]: function_name({...args})
+```
+### Diagnostic Suppression
+**Permanently disabled (commented out, not deleted):**
+- `synth_ai/tracing_v3/config.py:21` - `[TRACING_V3_CONFIG_LOADED]`
+- `synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py` - All `[PATCH]` messages
+- `synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py` - All `[PATCH]` messages
+- `synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py` - All `[PATCH]` messages
+**Why commented, not deleted?**
+- Preserves context for debugging
+- Shows what messages existed
+- Easy to re-enable if needed
+### Background Service Management
+**Task App:**
+- Runs from synth-ai root (required for discovery)
+- Uses `nohup` for detachment
+- Output → `nohup_task_app.out`
+- Health check accepts 200 or 400 (400 = server up, auth failing)
+- 120s timeout with progress updates
+**sqld:**
+- Dual ports: 8080 (Hrana WebSocket), 8081 (HTTP)
+- Health check: `GET http://127.0.0.1:8081/health`
+- 30s timeout
+- Auto-cleanup of existing processes
+## Configuration Reference
+### Critical Settings
+```toml
+[smoke]
+# Auto-start services
+task_app_name = "grpo-crafter"  # Task app to serve
+task_app_port = 8765
+task_app_env_file = ".env"      # Required for this app
+sqld_auto_start = true
+# Inference - REAL OpenAI
+model = "gpt-4o-mini"           # Actual model used
+mock_backend = "openai"         # Route through OpenAI API
+use_mock = true                 # Enable mock proxy
+# CRITICAL for tool call display
+return_trace = true             # Must be true!
+```
+### Optional Settings
+All `[smoke]` parameters are optional - CLI args override TOML values:
+```bash
+# Override max steps
+uv run synth-ai smoke --config configs/smoke_test.toml --max-steps 5
+# Use different model
+uv run synth-ai smoke --config configs/smoke_test.toml --model gpt-4o
+# Disable mock (use direct API - won't show tool calls properly)
+uv run synth-ai smoke --config configs/smoke_test.toml --no-mock
+```
+## Troubleshooting
+### No tool calls displayed
+**Symptom:** `⚠ No tool calls found in trace`
+**Solutions:**
+1. Verify `return_trace = true` in config
+2. Check `v3_traces=1/1` in output (should match successes)
+3. Ensure `use_mock = true` or using mock proxy
+4. Check task app logs: `cat /path/to/synth-ai/nohup_task_app.out`
+### Task app exits immediately
+**Symptom:** `0 steps`, process not running
+**Solutions:**
+1. Verify task app name: `synth-ai task-app list`
+2. Check .env file exists at `task_app_env_file` path
+3. Ensure running from correct directory
+4. Manual test: `cd /synth-ai && uvx synth-ai task-app serve grpo-crafter --port 8765 --env-file /path/.env --force`
+### Port conflicts
+**Symptom:** `Address already in use`
+**Solution:** Auto-cleanup should handle this, but manual cleanup:
+```bash
+lsof -ti :8080 | xargs kill -9
+lsof -ti :8081 | xargs kill -9
+lsof -ti :8765 | xargs kill -9
+```
+## Testing
+### Unit Tests
+- `tests/unit/test_train_validation.py::test_rl_config_with_smoke_section` - Validates `[smoke]` section parsing
+- `tests/unit/test_smoke_config.py` - Comprehensive Pydantic validation tests
+### Integration Test
+```bash
+cd examples/blog_posts/warming_up_to_rl
+uv run synth-ai smoke --config configs/smoke_test.toml
+```
+**Expected result:**
+- ✅ Services start successfully
+- ✅ 10 tool calls displayed
+- ✅ `v3_traces=1/1`
+- ✅ `successes=1/1`
+- ✅ `nonzero_returns=1/1`
+## Files Modified
+### Core Implementation
+- `synth_ai/cli/commands/smoke/core.py` - Tool call extraction, auto-start logic
+- `synth_ai/api/train/configs/rl.py` - `SmokeConfig` Pydantic model
+- `synth_ai/api/train/builders.py` - Remove `[smoke]` before sending to trainer
+### Diagnostic Suppression
+- `synth_ai/tracing_v3/config.py` - Commented out `[TRACING_V3_CONFIG_LOADED]`
+- `synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py` - Commented out `[PATCH]`
+- `synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py` - Commented out `[PATCH]`
+- `synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py` - Commented out `[PATCH]`
+### Documentation
+- `examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md` - User guide
+- `examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md` - Developer guide
+- `examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml` - Example config
+- `examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml` - Inline docs
+- `monorepo/docs/cli/smoke.mdx` - Mintlify CLI reference
+### Tests
+- `tests/unit/test_train_validation.py` - Added smoke section test
+- `tests/unit/test_smoke_config.py` - Comprehensive smoke config tests
+## Future Improvements
+Ideas for future agents:
+1. **Streaming display** - Show tool calls as they happen, not just at end
+2. **Tool call validation** - Verify format matches environment expectations
+3. **Performance metrics** - Track inference latency per call
+4. **Cost tracking** - Display OpenAI API costs
+5. **Parallel rollouts** - Support concurrent execution testing
+6. **Vision support** - Save observations for vision-based tasks
+7. **Interactive mode** - Step through rollout one action at a time
+8. **Replay mode** - Re-run saved traces for debugging
+## Success Criteria Met
+✅ **Tool calls visible** - All 10 calls displayed with arguments
+✅ **Real inference** - OpenAI gpt-4o-mini executing actual tool calls
+✅ **Clean output** - No diagnostic noise
+✅ **Auto-start** - Background services managed automatically
+✅ **Well documented** - Comprehensive docs for users and developers
+✅ **Robust** - Error handling, health checks, timeouts
+✅ **Tested** - Unit tests and working integration test
+## Contact
+For questions or issues, see:
+- Architecture details: `ARCHITECTURE.md`
+- User guide: `SMOKE_TESTING.md`
+- CLI reference: `monorepo/docs/cli/smoke.mdx`

examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml ADDED Viewed

@@ -0,0 +1,25 @@
+[eval]
+app_id = "grpo-crafter"
+task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
+model = "Qwen/Qwen3-4B"
+seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+max_turns = 20
+concurrency = 1
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "structured"
+return_trace = true
+[eval.policy_config]
+provider = "synth"
+model = "Qwen/Qwen3-4B"
+inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
+temperature = 0.6
+top_p = 0.95
+max_tokens = 2048
+use_vision = false
+image_only_mode = false
+max_llm_calls = 10
+[eval.env_config.env_params]
+max_steps_per_episode = 20

examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml ADDED Viewed

@@ -0,0 +1,26 @@
+[eval]
+app_id = "grpo-crafter"
+task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
+model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
+seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+max_turns = 20
+concurrency = 2
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "structured"
+return_trace = true
+[eval.policy_config]
+provider = "synth"
+model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
+inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
+temperature = 0.2
+top_p = 0.8
+max_tokens = 1024
+use_vision = false
+image_only_mode = false
+max_llm_calls = 10
+tool_choice = "auto"
+[eval.env_config.env_params]
+max_steps_per_episode = 20

examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml CHANGED Viewed

@@ -2,7 +2,7 @@
 # Assumes you stored rollouts in traces/v3/crafter_blog.db via `uvx synth-ai eval`.
 [filter]
-db = "traces/v3/crafter_blog.db"
+db = "sqlite+libsql://http://127.0.0.1:8080"
 output = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
 min_official_score = 0.1
 models = ["qwen/qwen3-32b", "openai/gpt-oss-120b"]

examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml ADDED Viewed

@@ -0,0 +1,75 @@
+# Example RL config with smoke testing enabled
+# This config demonstrates auto-starting task app and sqld for easy smoke testing
+type = "rl"
+# Smoke testing configuration - AUTO-STARTS services in background!
+[smoke]
+# Auto-start the task app server
+task_app_name = "grpo-crafter"  # Your task app name (use "synth-ai task-app list" to see available apps)
+task_app_port = 8765
+task_app_env_file = ".env"  # Required for this task app
+task_app_force = true  # Kill any existing process on this port
+# Auto-start sqld for tracing
+sqld_auto_start = true
+sqld_db_path = "./traces/local.db"
+sqld_hrana_port = 8080
+sqld_http_port = 8081
+# Test parameters
+env_name = "crafter"
+policy_name = "crafter-react"
+max_steps = 10
+policy = "gpt-5-nano"  # Use gpt-5-nano policy with mock backend
+model = "gpt-4o-mini"  # Real model to use via OpenAI
+mock_backend = "openai"  # Use OpenAI backend for real inference and tool calls
+return_trace = true
+use_mock = true  # Use mock proxy that routes to OpenAI
+# RL Training Configuration (used by actual training, not smoke tests)
+[algorithm]
+type = "online"
+method = "policy_gradient"
+variety = "gspo"
+[policy]
+model_name = "Qwen/Qwen3-4B"
+trainer_mode = "full"
+label = "crafter-rl-demo"
+[compute]
+gpu_type = "H100"
+gpu_count = 2
+[compute.topology]
+type = "single_node_split"
+gpus_for_vllm = 1
+gpus_for_training = 1
+[services]
+task_url = "http://localhost:8765"
+[rollout]
+env_name = "crafter"
+policy_name = "crafter-react"
+max_turns = 10
+episodes_per_batch = 16
+max_concurrent_rollouts = 4
+task_app_origin_rewards_only = true
+[training]
+num_epochs = 1
+iterations_per_epoch = 10
+max_turns = 10
+batch_size = 4
+group_size = 4
+learning_rate = 5e-5
+weight_sync_interval = 1
+log_interval = 1
+[evaluation]
+instances = 2
+every_n_iters = 1
+seeds = [0, 1]

examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml CHANGED Viewed

@@ -4,28 +4,66 @@
 type = "rl"
+# [smoke] section is OPTIONAL and only used by `synth-ai smoke` command for local testing.
+# This section is completely IGNORED by the RL trainer and will not affect training jobs.
+# It allows you to quickly test your task app without passing many CLI arguments:
+#   uvx synth-ai smoke --config this-file.toml
+# All values are optional; CLI args override TOML values.
+[smoke]
+task_url = "https://synth-laboratories--crafter-blogpost-fastapi-app-dev.modal.run"
+env_name = "crafter"
+policy_name = "crafter-react"
+max_steps = 10
+policy = "mock"  # mock, gpt-5-nano, openai, groq
+model = "gpt-5-nano"
+mock_backend = "openai"  # synthetic or openai
+mock_port = 0  # 0 = auto-assign
+return_trace = true
+use_mock = true
+[algorithm]
+type = "online"
+method = "policy_gradient"
+variety = "gspo"
 [services]
-task_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
+task_url = "https://synth-laboratories--crafter-blogpost-fastapi-app-dev.modal.run"
+judge_url = "https://synth-backend-dev-docker.onrender.com/api"
 [compute]
-gpu_type = "H100"
-gpu_count = 8
+gpu_type = "H200"
+gpu_count = 2
+[compute.topology]
+reference_placement = "none"
 [topology]
-gpus_for_vllm = 4
-gpus_for_training = 3
-gpus_for_ref = 1
+type = "single_node_split"
+reference_placement = "none"
+gpus_for_vllm = 1
+gpus_for_training = 1
+gpus_for_ref = 0
+tensor_parallel = 1
 [vllm]
-tensor_parallel_size = 4
+tensor_parallel_size = 1
+max_model_len = 8192
+[reference]
+placement = "none"
 [model]
-source = "fft:REPLACE-WITH-SFT-JOB-ID"
-label = "crafter-rl-blogpost"
+base = "Qwen/Qwen3-4B"
+trainer_mode = "lora"
+label = "crafter-rl-baseline"
 [rollout]
+env_name = "crafter"
+policy_name = "crafter-react"
 max_turns = 10
-episodes_per_batch = 64
+episodes_per_batch = 20
+max_concurrent_rollouts = 8
+rubric_rewards_only = false
+task_app_origin_rewards_only = true
 [evaluation]
 instances = 100
@@ -33,9 +71,21 @@ every_n_iters = 20
 seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
 [training]
+num_epochs = 1
+iterations_per_epoch = 1
+max_turns = 10
+batch_size = 2
+group_size = 2
+learning_rate = 5e-6
+weight_sync_interval = 1
 log_interval = 1
+max_completion_tokens = 256
+async_semaphore_max = 4
 [training.weight_sync]
 enable = true
 targets = ["policy"]
 weight_sync_interval = 1
+[rubric]
+enabled = false

examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml CHANGED Viewed

@@ -8,7 +8,7 @@ variety = "fft"
 [job]
 model = "Qwen/Qwen3-4B"
-data = "../ft_data/crafter_blog_high_reward.jsonl"
+data = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
 poll_seconds = 1800
 [compute]

examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""Warming Up to RL baseline for Crafter.
+This baseline demonstrates how to evaluate an LLM agent on the Crafter survival game
+without requiring a deployed task app. This is the recommended starting point for coding
+agents to get a baseline score before making changes.
+Quick Start:
+    # Run a quick 3-task baseline
+    uvx synth-ai baseline warming_up_to_rl --split train --seeds 0,1,2
+    # Full train evaluation
+    uvx synth-ai baseline warming_up_to_rl --split train
+    # Compare models
+    uvx synth-ai baseline warming_up_to_rl --model groq:openai/gpt-oss-20b
+"""
+from __future__ import annotations
+import json
+from typing import Any
+try:
+    import crafter
+    CRAFTER_AVAILABLE = True
+except ImportError:
+    CRAFTER_AVAILABLE = False
+from synth_ai.baseline import BaselineConfig, BaselineTaskRunner, DataSplit, TaskResult
+from synth_ai.types import EventReward, OutcomeReward
+class CrafterRunner(BaselineTaskRunner):
+    """Task runner for Crafter environment."""
+    def __init__(self, policy_config: dict[str, Any], env_config: dict[str, Any]):
+        super().__init__(policy_config, env_config)
+        self.max_steps = env_config.get("max_steps", 1000)
+    async def run_task(self, seed: int) -> TaskResult:
+        """Run a single Crafter episode."""
+        if not CRAFTER_AVAILABLE:
+            raise ImportError(
+                "Crafter not installed. Install with: pip install crafter"
+            )
+        # Create environment
+        env = crafter.Env()
+        env.reset()
+        # Initialize tracking
+        event_rewards: list[EventReward] = []
+        achievements = {}
+        step_count = 0
+        # Get model configuration
+        from synth_ai.inference.client import InferenceClient
+        client = InferenceClient()
+        model = self.policy_config.get("model", "gpt-4o-mini")
+        temperature = self.policy_config.get("temperature", 0.7)
+        # Define action tool
+        actions = [
+            "noop", "move_left", "move_right", "move_up", "move_down",
+            "do", "sleep", "place_stone", "place_table", "place_furnace",
+            "place_plant", "make_wood_pickaxe", "make_stone_pickaxe",
+            "make_iron_pickaxe", "make_wood_sword", "make_stone_sword",
+            "make_iron_sword"
+        ]
+        action_tool = {
+            "type": "function",
+            "function": {
+                "name": "take_action",
+                "description": "Take an action in the Crafter world",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "action": {
+                            "type": "string",
+                            "enum": actions,
+                            "description": f"Action to take. Available: {', '.join(actions)}",
+                        }
+                    },
+                    "required": ["action"],
+                },
+            },
+        }
+        # Run episode
+        done = False
+        while not done and step_count < self.max_steps:
+            # Get observation (would include visual state in full implementation)
+            obs_str = f"Crafter Step {step_count}\n"
+            obs_str += f"Current achievements: {achievements}\n"
+            obs_str += "What action should you take to survive and progress?"
+            # Get action from model
+            try:
+                response = await client.generate(
+                    model=model,
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": "You are an expert at survival games. Use the take_action tool to survive and achieve goals in Crafter.",
+                        },
+                        {"role": "user", "content": obs_str},
+                    ],
+                    tools=[action_tool],
+                    temperature=temperature,
+                    max_tokens=100,
+                )
+                # Extract action
+                action_name = "noop"
+                if response.get("tool_calls"):
+                    tool_call = response["tool_calls"][0]
+                    args = json.loads(tool_call["function"]["arguments"])
+                    action_name = args.get("action", "noop")
+                action_idx = actions.index(action_name) if action_name in actions else 0
+                # Take step
+                obs, reward, done, info = env.step(action_idx)
+                # Update achievements
+                if "achievements" in info:
+                    achievements.update(info["achievements"])
+                # Track rewards
+                if reward > 0:
+                    event_rewards.append(
+                        EventReward(
+                            event_id=f"step_{step_count}",
+                            reward=reward,
+                            metadata={"action": action_name, "achievements": achievements.copy()},
+                        )
+                    )
+                step_count += 1
+            except Exception as e:
+                done = True
+                break
+        # Calculate outcome reward based on achievements
+        total_achievements = sum(achievements.values())
+        success = total_achievements >= 3  # At least 3 achievements
+        return TaskResult(
+            success=success,
+            outcome_reward=OutcomeReward(
+                reward=float(total_achievements),
+                metadata={
+                    "steps": step_count,
+                    "achievements": achievements,
+                    "seed": seed,
+                },
+            ),
+            event_rewards=event_rewards,
+            total_steps=step_count,
+            metadata={"achievements": achievements},
+        )
+# Define baseline configuration (only if Crafter is available)
+if CRAFTER_AVAILABLE:
+    warming_up_to_rl_baseline = BaselineConfig(
+        baseline_id="warming_up_to_rl",
+        name="Warming Up to RL - Crafter",
+        description="Crafter survival game baseline for comparing agent performance on RL tasks",
+        task_runner=CrafterRunner,
+        splits={
+            "train": DataSplit(name="train", seeds=list(range(20))),
+            "val": DataSplit(name="val", seeds=list(range(20, 25))),
+            "test": DataSplit(name="test", seeds=list(range(25, 30))),
+        },
+        default_policy_config={
+            "model": "gpt-4o-mini",
+            "temperature": 0.7,
+        },
+        default_env_config={
+            "max_steps": 1000,
+        },
+        tags=["rl", "survival", "achievements", "blog-post"],
+    )

examples/multi_step/configs/VERILOG_REWARDS.md CHANGED Viewed

	@@ -88,3 +88,7 @@ Expected output for successful rollout:
88 88	- `mean_return` ≈ 1.0+ (if full submit success)
89 89
90 90
91	+
92	+
93	+
94	+

synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl