synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (169) hide show
  1. examples/baseline/banking77_baseline.py +204 -0
  2. examples/baseline/crafter_baseline.py +407 -0
  3. examples/baseline/pokemon_red_baseline.py +326 -0
  4. examples/baseline/simple_baseline.py +56 -0
  5. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  6. examples/blog_posts/gepa/README.md +355 -0
  7. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  9. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  10. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  13. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  15. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  16. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  18. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  19. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  20. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  21. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  22. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  23. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  24. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  25. examples/blog_posts/gepa/task_apps.py +105 -0
  26. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  27. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  28. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  29. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
  30. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
  31. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  32. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  33. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  34. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  35. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  36. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  37. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  38. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  39. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  40. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  41. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  42. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  43. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
  44. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  45. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
  46. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
  47. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  48. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  49. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  50. examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
  51. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
  52. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
  53. examples/rl/configs/rl_from_base_qwen17.toml +1 -0
  54. examples/swe/task_app/hosted/inference/openai_client.py +0 -34
  55. examples/swe/task_app/hosted/policy_routes.py +17 -0
  56. examples/swe/task_app/hosted/rollout.py +4 -2
  57. examples/task_apps/banking77/__init__.py +6 -0
  58. examples/task_apps/banking77/banking77_task_app.py +841 -0
  59. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  60. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  61. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  62. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  63. examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
  64. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  65. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  69. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  70. examples/task_apps/gepa_benchmarks/common.py +260 -0
  71. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  72. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  73. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  74. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  75. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  76. examples/task_apps/pokemon_red/task_app.py +254 -36
  77. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
  78. examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
  85. synth_ai/api/train/builders.py +90 -1
  86. synth_ai/api/train/cli.py +396 -21
  87. synth_ai/api/train/config_finder.py +13 -2
  88. synth_ai/api/train/configs/__init__.py +15 -1
  89. synth_ai/api/train/configs/prompt_learning.py +442 -0
  90. synth_ai/api/train/configs/rl.py +29 -0
  91. synth_ai/api/train/task_app.py +1 -1
  92. synth_ai/api/train/validators.py +277 -0
  93. synth_ai/baseline/__init__.py +25 -0
  94. synth_ai/baseline/config.py +209 -0
  95. synth_ai/baseline/discovery.py +214 -0
  96. synth_ai/baseline/execution.py +146 -0
  97. synth_ai/cli/__init__.py +85 -17
  98. synth_ai/cli/__main__.py +0 -0
  99. synth_ai/cli/claude.py +70 -0
  100. synth_ai/cli/codex.py +84 -0
  101. synth_ai/cli/commands/__init__.py +1 -0
  102. synth_ai/cli/commands/baseline/__init__.py +12 -0
  103. synth_ai/cli/commands/baseline/core.py +637 -0
  104. synth_ai/cli/commands/baseline/list.py +93 -0
  105. synth_ai/cli/commands/eval/core.py +13 -10
  106. synth_ai/cli/commands/filter/core.py +53 -17
  107. synth_ai/cli/commands/help/core.py +0 -1
  108. synth_ai/cli/commands/smoke/__init__.py +7 -0
  109. synth_ai/cli/commands/smoke/core.py +1436 -0
  110. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  111. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  112. synth_ai/cli/commands/train/judge_schemas.py +1 -0
  113. synth_ai/cli/commands/train/judge_validation.py +1 -0
  114. synth_ai/cli/commands/train/validation.py +0 -57
  115. synth_ai/cli/demo.py +35 -3
  116. synth_ai/cli/deploy/__init__.py +40 -25
  117. synth_ai/cli/deploy.py +162 -0
  118. synth_ai/cli/legacy_root_backup.py +14 -8
  119. synth_ai/cli/opencode.py +107 -0
  120. synth_ai/cli/root.py +9 -5
  121. synth_ai/cli/task_app_deploy.py +1 -1
  122. synth_ai/cli/task_apps.py +53 -53
  123. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  124. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  125. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  126. synth_ai/judge_schemas.py +1 -0
  127. synth_ai/learning/__init__.py +10 -0
  128. synth_ai/learning/prompt_learning_client.py +276 -0
  129. synth_ai/learning/prompt_learning_types.py +184 -0
  130. synth_ai/pricing/__init__.py +2 -0
  131. synth_ai/pricing/model_pricing.py +57 -0
  132. synth_ai/streaming/handlers.py +53 -4
  133. synth_ai/streaming/streamer.py +19 -0
  134. synth_ai/task/apps/__init__.py +1 -0
  135. synth_ai/task/config.py +2 -0
  136. synth_ai/task/tracing_utils.py +25 -25
  137. synth_ai/task/validators.py +44 -8
  138. synth_ai/task_app_cfgs.py +21 -0
  139. synth_ai/tracing_v3/config.py +162 -19
  140. synth_ai/tracing_v3/constants.py +1 -1
  141. synth_ai/tracing_v3/db_config.py +24 -38
  142. synth_ai/tracing_v3/storage/config.py +47 -13
  143. synth_ai/tracing_v3/storage/factory.py +3 -3
  144. synth_ai/tracing_v3/turso/daemon.py +113 -11
  145. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  146. synth_ai/types.py +8 -0
  147. synth_ai/urls.py +11 -0
  148. synth_ai/utils/__init__.py +30 -1
  149. synth_ai/utils/agents.py +74 -0
  150. synth_ai/utils/bin.py +39 -0
  151. synth_ai/utils/cli.py +149 -5
  152. synth_ai/utils/env.py +17 -17
  153. synth_ai/utils/json.py +72 -0
  154. synth_ai/utils/modal.py +283 -1
  155. synth_ai/utils/paths.py +48 -0
  156. synth_ai/utils/uvicorn.py +113 -0
  157. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
  158. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
  159. synth_ai/cli/commands/deploy/__init__.py +0 -23
  160. synth_ai/cli/commands/deploy/core.py +0 -614
  161. synth_ai/cli/commands/deploy/errors.py +0 -72
  162. synth_ai/cli/commands/deploy/validation.py +0 -11
  163. synth_ai/cli/deploy/core.py +0 -5
  164. synth_ai/cli/deploy/errors.py +0 -23
  165. synth_ai/cli/deploy/validation.py +0 -5
  166. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  167. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  168. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  169. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,253 @@
1
+ # Smoke Test Implementation - Complete
2
+
3
+ ## Summary
4
+
5
+ The smoke test now provides **complete visibility into RL training rollouts**, including:
6
+
7
+ ✅ **Auto-start background services** (sqld, task app)
8
+ ✅ **Real OpenAI inference** with gpt-4o-mini
9
+ ✅ **Tool call display** - see every action the policy takes
10
+ ✅ **Trace validation** - verify v3 trace format
11
+ ✅ **Clean output** - all diagnostic noise suppressed
12
+
13
+ ## Quick Start
14
+
15
+ ```bash
16
+ cd examples/blog_posts/warming_up_to_rl
17
+ uv run synth-ai smoke --config configs/smoke_test.toml
18
+ ```
19
+
20
+ **Output shows:**
21
+ - Service startup (sqld, task app)
22
+ - Real-time inference requests
23
+ - **All 10 tool calls with arguments** (e.g., `interact_many({"actions":["move_up","move_up"]})`)
24
+ - Rollout metrics (steps, returns, rewards)
25
+ - Success validation
26
+
27
+ ## Documentation
28
+
29
+ All documentation has been updated for future agents:
30
+
31
+ ### 1. User Documentation
32
+ - **`SMOKE_TESTING.md`** - How to run smoke tests, what to expect
33
+ - **`configs/smoke_test.toml`** - Well-commented example configuration
34
+ - **`monorepo/docs/cli/smoke.mdx`** - Mintlify CLI documentation
35
+
36
+ ### 2. Developer Documentation
37
+ - **`ARCHITECTURE.md`** - Internal architecture, troubleshooting guide
38
+ - **`synth_ai/cli/commands/smoke/core.py`** - Extensive inline comments explaining tool call extraction
39
+
40
+ ### 3. Code Comments
41
+
42
+ **Tool Call Extraction (core.py lines 946-997):**
43
+ ```python
44
+ # Extract and display tool calls from v3 trace
45
+ #
46
+ # IMPORTANT: Tool calls are extracted from the structured v3 trace format.
47
+ # The trace must be requested with return_trace=True for this to work.
48
+ #
49
+ # Trace structure:
50
+ # trace.event_history[] - list of events (policy calls, env steps)
51
+ # ├─ event.call_records[] - LLM calls made during this event
52
+ # ├─ call_record.output_tool_calls[] - tool calls from LLM response
53
+ # ├─ tool_call.name - function name (e.g., "interact_many")
54
+ # └─ tool_call.arguments_json - JSON string of arguments
55
+ ```
56
+
57
+ ## Key Implementation Details
58
+
59
+ ### Tool Call Display
60
+
61
+ **Requirements:**
62
+ 1. `return_trace = true` in config (CRITICAL - without this, no tool calls)
63
+ 2. v3 trace format (`trace_format="structured"`)
64
+ 3. Mock proxy or real inference (direct API calls don't populate traces correctly)
65
+
66
+ **Data Flow:**
67
+ ```
68
+ 1. Rollout request with return_trace=True
69
+
70
+ 2. Task app makes LLM calls, captures responses
71
+
72
+ 3. LLM responses include tool_calls
73
+
74
+ 4. Task app stores call_records in event_history
75
+
76
+ 5. Smoke command extracts from trace.event_history[].call_records[].output_tool_calls[]
77
+
78
+ 6. Display: TOOL_CALL[N]: function_name({...args})
79
+ ```
80
+
81
+ ### Diagnostic Suppression
82
+
83
+ **Permanently disabled (commented out, not deleted):**
84
+ - `synth_ai/tracing_v3/config.py:21` - `[TRACING_V3_CONFIG_LOADED]`
85
+ - `synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py` - All `[PATCH]` messages
86
+ - `synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py` - All `[PATCH]` messages
87
+ - `synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py` - All `[PATCH]` messages
88
+
89
+ **Why commented, not deleted?**
90
+ - Preserves context for debugging
91
+ - Shows what messages existed
92
+ - Easy to re-enable if needed
93
+
94
+ ### Background Service Management
95
+
96
+ **Task App:**
97
+ - Runs from synth-ai root (required for discovery)
98
+ - Uses `nohup` for detachment
99
+ - Output → `nohup_task_app.out`
100
+ - Health check accepts 200 or 400 (400 = server up, auth failing)
101
+ - 120s timeout with progress updates
102
+
103
+ **sqld:**
104
+ - Dual ports: 8080 (Hrana WebSocket), 8081 (HTTP)
105
+ - Health check: `GET http://127.0.0.1:8081/health`
106
+ - 30s timeout
107
+ - Auto-cleanup of existing processes
108
+
109
+ ## Configuration Reference
110
+
111
+ ### Critical Settings
112
+
113
+ ```toml
114
+ [smoke]
115
+ # Auto-start services
116
+ task_app_name = "grpo-crafter" # Task app to serve
117
+ task_app_port = 8765
118
+ task_app_env_file = ".env" # Required for this app
119
+ sqld_auto_start = true
120
+
121
+ # Inference - REAL OpenAI
122
+ model = "gpt-4o-mini" # Actual model used
123
+ mock_backend = "openai" # Route through OpenAI API
124
+ use_mock = true # Enable mock proxy
125
+
126
+ # CRITICAL for tool call display
127
+ return_trace = true # Must be true!
128
+ ```
129
+
130
+ ### Optional Settings
131
+
132
+ All `[smoke]` parameters are optional - CLI args override TOML values:
133
+
134
+ ```bash
135
+ # Override max steps
136
+ uv run synth-ai smoke --config configs/smoke_test.toml --max-steps 5
137
+
138
+ # Use different model
139
+ uv run synth-ai smoke --config configs/smoke_test.toml --model gpt-4o
140
+
141
+ # Disable mock (use direct API - won't show tool calls properly)
142
+ uv run synth-ai smoke --config configs/smoke_test.toml --no-mock
143
+ ```
144
+
145
+ ## Troubleshooting
146
+
147
+ ### No tool calls displayed
148
+
149
+ **Symptom:** `⚠ No tool calls found in trace`
150
+
151
+ **Solutions:**
152
+ 1. Verify `return_trace = true` in config
153
+ 2. Check `v3_traces=1/1` in output (should match successes)
154
+ 3. Ensure `use_mock = true` or using mock proxy
155
+ 4. Check task app logs: `cat /path/to/synth-ai/nohup_task_app.out`
156
+
157
+ ### Task app exits immediately
158
+
159
+ **Symptom:** `0 steps`, process not running
160
+
161
+ **Solutions:**
162
+ 1. Verify task app name: `synth-ai task-app list`
163
+ 2. Check .env file exists at `task_app_env_file` path
164
+ 3. Ensure running from correct directory
165
+ 4. Manual test: `cd /synth-ai && uvx synth-ai task-app serve grpo-crafter --port 8765 --env-file /path/.env --force`
166
+
167
+ ### Port conflicts
168
+
169
+ **Symptom:** `Address already in use`
170
+
171
+ **Solution:** Auto-cleanup should handle this, but manual cleanup:
172
+ ```bash
173
+ lsof -ti :8080 | xargs kill -9
174
+ lsof -ti :8081 | xargs kill -9
175
+ lsof -ti :8765 | xargs kill -9
176
+ ```
177
+
178
+ ## Testing
179
+
180
+ ### Unit Tests
181
+
182
+ - `tests/unit/test_train_validation.py::test_rl_config_with_smoke_section` - Validates `[smoke]` section parsing
183
+ - `tests/unit/test_smoke_config.py` - Comprehensive Pydantic validation tests
184
+
185
+ ### Integration Test
186
+
187
+ ```bash
188
+ cd examples/blog_posts/warming_up_to_rl
189
+ uv run synth-ai smoke --config configs/smoke_test.toml
190
+ ```
191
+
192
+ **Expected result:**
193
+ - ✅ Services start successfully
194
+ - ✅ 10 tool calls displayed
195
+ - ✅ `v3_traces=1/1`
196
+ - ✅ `successes=1/1`
197
+ - ✅ `nonzero_returns=1/1`
198
+
199
+ ## Files Modified
200
+
201
+ ### Core Implementation
202
+ - `synth_ai/cli/commands/smoke/core.py` - Tool call extraction, auto-start logic
203
+ - `synth_ai/api/train/configs/rl.py` - `SmokeConfig` Pydantic model
204
+ - `synth_ai/api/train/builders.py` - Remove `[smoke]` before sending to trainer
205
+
206
+ ### Diagnostic Suppression
207
+ - `synth_ai/tracing_v3/config.py` - Commented out `[TRACING_V3_CONFIG_LOADED]`
208
+ - `synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py` - Commented out `[PATCH]`
209
+ - `synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py` - Commented out `[PATCH]`
210
+ - `synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py` - Commented out `[PATCH]`
211
+
212
+ ### Documentation
213
+ - `examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md` - User guide
214
+ - `examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md` - Developer guide
215
+ - `examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml` - Example config
216
+ - `examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml` - Inline docs
217
+ - `monorepo/docs/cli/smoke.mdx` - Mintlify CLI reference
218
+
219
+ ### Tests
220
+ - `tests/unit/test_train_validation.py` - Added smoke section test
221
+ - `tests/unit/test_smoke_config.py` - Comprehensive smoke config tests
222
+
223
+ ## Future Improvements
224
+
225
+ Ideas for future agents:
226
+
227
+ 1. **Streaming display** - Show tool calls as they happen, not just at end
228
+ 2. **Tool call validation** - Verify format matches environment expectations
229
+ 3. **Performance metrics** - Track inference latency per call
230
+ 4. **Cost tracking** - Display OpenAI API costs
231
+ 5. **Parallel rollouts** - Support concurrent execution testing
232
+ 6. **Vision support** - Save observations for vision-based tasks
233
+ 7. **Interactive mode** - Step through rollout one action at a time
234
+ 8. **Replay mode** - Re-run saved traces for debugging
235
+
236
+ ## Success Criteria Met
237
+
238
+ ✅ **Tool calls visible** - All 10 calls displayed with arguments
239
+ ✅ **Real inference** - OpenAI gpt-4o-mini executing actual tool calls
240
+ ✅ **Clean output** - No diagnostic noise
241
+ ✅ **Auto-start** - Background services managed automatically
242
+ ✅ **Well documented** - Comprehensive docs for users and developers
243
+ ✅ **Robust** - Error handling, health checks, timeouts
244
+ ✅ **Tested** - Unit tests and working integration test
245
+
246
+ ## Contact
247
+
248
+ For questions or issues, see:
249
+ - Architecture details: `ARCHITECTURE.md`
250
+ - User guide: `SMOKE_TESTING.md`
251
+ - CLI reference: `monorepo/docs/cli/smoke.mdx`
252
+
253
+
@@ -0,0 +1,25 @@
1
+ [eval]
2
+ app_id = "grpo-crafter"
3
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
4
+ model = "Qwen/Qwen3-4B"
5
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
6
+ max_turns = 20
7
+ concurrency = 1
8
+ env_name = "crafter"
9
+ policy_name = "crafter-react"
10
+ trace_format = "structured"
11
+ return_trace = true
12
+
13
+ [eval.policy_config]
14
+ provider = "synth"
15
+ model = "Qwen/Qwen3-4B"
16
+ inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
17
+ temperature = 0.6
18
+ top_p = 0.95
19
+ max_tokens = 2048
20
+ use_vision = false
21
+ image_only_mode = false
22
+ max_llm_calls = 10
23
+
24
+ [eval.env_config.env_params]
25
+ max_steps_per_episode = 20
@@ -0,0 +1,26 @@
1
+ [eval]
2
+ app_id = "grpo-crafter"
3
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
4
+ model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
5
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
6
+ max_turns = 20
7
+ concurrency = 2
8
+ env_name = "crafter"
9
+ policy_name = "crafter-react"
10
+ trace_format = "structured"
11
+ return_trace = true
12
+
13
+ [eval.policy_config]
14
+ provider = "synth"
15
+ model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
16
+ inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
17
+ temperature = 0.2
18
+ top_p = 0.8
19
+ max_tokens = 1024
20
+ use_vision = false
21
+ image_only_mode = false
22
+ max_llm_calls = 10
23
+ tool_choice = "auto"
24
+
25
+ [eval.env_config.env_params]
26
+ max_steps_per_episode = 20
@@ -2,7 +2,7 @@
2
2
  # Assumes you stored rollouts in traces/v3/crafter_blog.db via `uvx synth-ai eval`.
3
3
 
4
4
  [filter]
5
- db = "traces/v3/crafter_blog.db"
5
+ db = "sqlite+libsql://http://127.0.0.1:8080"
6
6
  output = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
7
7
  min_official_score = 0.1
8
8
  models = ["qwen/qwen3-32b", "openai/gpt-oss-120b"]
@@ -0,0 +1,75 @@
1
+ # Example RL config with smoke testing enabled
2
+ # This config demonstrates auto-starting task app and sqld for easy smoke testing
3
+
4
+ type = "rl"
5
+
6
+ # Smoke testing configuration - AUTO-STARTS services in background!
7
+ [smoke]
8
+ # Auto-start the task app server
9
+ task_app_name = "grpo-crafter" # Your task app name (use "synth-ai task-app list" to see available apps)
10
+ task_app_port = 8765
11
+ task_app_env_file = ".env" # Required for this task app
12
+ task_app_force = true # Kill any existing process on this port
13
+
14
+ # Auto-start sqld for tracing
15
+ sqld_auto_start = true
16
+ sqld_db_path = "./traces/local.db"
17
+ sqld_hrana_port = 8080
18
+ sqld_http_port = 8081
19
+
20
+ # Test parameters
21
+ env_name = "crafter"
22
+ policy_name = "crafter-react"
23
+ max_steps = 10
24
+ policy = "gpt-5-nano" # Use gpt-5-nano policy with mock backend
25
+ model = "gpt-4o-mini" # Real model to use via OpenAI
26
+ mock_backend = "openai" # Use OpenAI backend for real inference and tool calls
27
+ return_trace = true
28
+ use_mock = true # Use mock proxy that routes to OpenAI
29
+
30
+ # RL Training Configuration (used by actual training, not smoke tests)
31
+ [algorithm]
32
+ type = "online"
33
+ method = "policy_gradient"
34
+ variety = "gspo"
35
+
36
+ [policy]
37
+ model_name = "Qwen/Qwen3-4B"
38
+ trainer_mode = "full"
39
+ label = "crafter-rl-demo"
40
+
41
+ [compute]
42
+ gpu_type = "H100"
43
+ gpu_count = 2
44
+
45
+ [compute.topology]
46
+ type = "single_node_split"
47
+ gpus_for_vllm = 1
48
+ gpus_for_training = 1
49
+
50
+ [services]
51
+ task_url = "http://localhost:8765"
52
+
53
+ [rollout]
54
+ env_name = "crafter"
55
+ policy_name = "crafter-react"
56
+ max_turns = 10
57
+ episodes_per_batch = 16
58
+ max_concurrent_rollouts = 4
59
+ task_app_origin_rewards_only = true
60
+
61
+ [training]
62
+ num_epochs = 1
63
+ iterations_per_epoch = 10
64
+ max_turns = 10
65
+ batch_size = 4
66
+ group_size = 4
67
+ learning_rate = 5e-5
68
+ weight_sync_interval = 1
69
+ log_interval = 1
70
+
71
+ [evaluation]
72
+ instances = 2
73
+ every_n_iters = 1
74
+ seeds = [0, 1]
75
+
@@ -4,28 +4,66 @@
4
4
 
5
5
  type = "rl"
6
6
 
7
+ # [smoke] section is OPTIONAL and only used by `synth-ai smoke` command for local testing.
8
+ # This section is completely IGNORED by the RL trainer and will not affect training jobs.
9
+ # It allows you to quickly test your task app without passing many CLI arguments:
10
+ # uvx synth-ai smoke --config this-file.toml
11
+ # All values are optional; CLI args override TOML values.
12
+ [smoke]
13
+ task_url = "https://synth-laboratories--crafter-blogpost-fastapi-app-dev.modal.run"
14
+ env_name = "crafter"
15
+ policy_name = "crafter-react"
16
+ max_steps = 10
17
+ policy = "mock" # mock, gpt-5-nano, openai, groq
18
+ model = "gpt-5-nano"
19
+ mock_backend = "openai" # synthetic or openai
20
+ mock_port = 0 # 0 = auto-assign
21
+ return_trace = true
22
+ use_mock = true
23
+
24
+ [algorithm]
25
+ type = "online"
26
+ method = "policy_gradient"
27
+ variety = "gspo"
28
+
7
29
  [services]
8
- task_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
30
+ task_url = "https://synth-laboratories--crafter-blogpost-fastapi-app-dev.modal.run"
31
+ judge_url = "https://synth-backend-dev-docker.onrender.com/api"
9
32
 
10
33
  [compute]
11
- gpu_type = "H100"
12
- gpu_count = 8
34
+ gpu_type = "H200"
35
+ gpu_count = 2
36
+ [compute.topology]
37
+ reference_placement = "none"
13
38
 
14
39
  [topology]
15
- gpus_for_vllm = 4
16
- gpus_for_training = 3
17
- gpus_for_ref = 1
40
+ type = "single_node_split"
41
+ reference_placement = "none"
42
+ gpus_for_vllm = 1
43
+ gpus_for_training = 1
44
+ gpus_for_ref = 0
45
+ tensor_parallel = 1
18
46
 
19
47
  [vllm]
20
- tensor_parallel_size = 4
48
+ tensor_parallel_size = 1
49
+ max_model_len = 8192
50
+
51
+ [reference]
52
+ placement = "none"
21
53
 
22
54
  [model]
23
- source = "fft:REPLACE-WITH-SFT-JOB-ID"
24
- label = "crafter-rl-blogpost"
55
+ base = "Qwen/Qwen3-4B"
56
+ trainer_mode = "lora"
57
+ label = "crafter-rl-baseline"
25
58
 
26
59
  [rollout]
60
+ env_name = "crafter"
61
+ policy_name = "crafter-react"
27
62
  max_turns = 10
28
- episodes_per_batch = 64
63
+ episodes_per_batch = 20
64
+ max_concurrent_rollouts = 8
65
+ rubric_rewards_only = false
66
+ task_app_origin_rewards_only = true
29
67
 
30
68
  [evaluation]
31
69
  instances = 100
@@ -33,9 +71,21 @@ every_n_iters = 20
33
71
  seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
34
72
 
35
73
  [training]
74
+ num_epochs = 1
75
+ iterations_per_epoch = 1
76
+ max_turns = 10
77
+ batch_size = 2
78
+ group_size = 2
79
+ learning_rate = 5e-6
80
+ weight_sync_interval = 1
36
81
  log_interval = 1
82
+ max_completion_tokens = 256
83
+ async_semaphore_max = 4
37
84
 
38
85
  [training.weight_sync]
39
86
  enable = true
40
87
  targets = ["policy"]
41
88
  weight_sync_interval = 1
89
+
90
+ [rubric]
91
+ enabled = false
@@ -8,7 +8,7 @@ variety = "fft"
8
8
 
9
9
  [job]
10
10
  model = "Qwen/Qwen3-4B"
11
- data = "../ft_data/crafter_blog_high_reward.jsonl"
11
+ data = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
12
12
  poll_seconds = 1800
13
13
 
14
14
  [compute]
@@ -0,0 +1,187 @@
1
+ """Warming Up to RL baseline for Crafter.
2
+
3
+ This baseline demonstrates how to evaluate an LLM agent on the Crafter survival game
4
+ without requiring a deployed task app. This is the recommended starting point for coding
5
+ agents to get a baseline score before making changes.
6
+
7
+ Quick Start:
8
+ # Run a quick 3-task baseline
9
+ uvx synth-ai baseline warming_up_to_rl --split train --seeds 0,1,2
10
+
11
+ # Full train evaluation
12
+ uvx synth-ai baseline warming_up_to_rl --split train
13
+
14
+ # Compare models
15
+ uvx synth-ai baseline warming_up_to_rl --model groq:openai/gpt-oss-20b
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ from typing import Any
22
+
23
+ try:
24
+ import crafter
25
+ CRAFTER_AVAILABLE = True
26
+ except ImportError:
27
+ CRAFTER_AVAILABLE = False
28
+
29
+ from synth_ai.baseline import BaselineConfig, BaselineTaskRunner, DataSplit, TaskResult
30
+ from synth_ai.types import EventReward, OutcomeReward
31
+
32
+
33
+ class CrafterRunner(BaselineTaskRunner):
34
+ """Task runner for Crafter environment."""
35
+
36
+ def __init__(self, policy_config: dict[str, Any], env_config: dict[str, Any]):
37
+ super().__init__(policy_config, env_config)
38
+ self.max_steps = env_config.get("max_steps", 1000)
39
+
40
+ async def run_task(self, seed: int) -> TaskResult:
41
+ """Run a single Crafter episode."""
42
+ if not CRAFTER_AVAILABLE:
43
+ raise ImportError(
44
+ "Crafter not installed. Install with: pip install crafter"
45
+ )
46
+
47
+ # Create environment
48
+ env = crafter.Env()
49
+ env.reset()
50
+
51
+ # Initialize tracking
52
+ event_rewards: list[EventReward] = []
53
+ achievements = {}
54
+ step_count = 0
55
+
56
+ # Get model configuration
57
+ from synth_ai.inference.client import InferenceClient
58
+
59
+ client = InferenceClient()
60
+ model = self.policy_config.get("model", "gpt-4o-mini")
61
+ temperature = self.policy_config.get("temperature", 0.7)
62
+
63
+ # Define action tool
64
+ actions = [
65
+ "noop", "move_left", "move_right", "move_up", "move_down",
66
+ "do", "sleep", "place_stone", "place_table", "place_furnace",
67
+ "place_plant", "make_wood_pickaxe", "make_stone_pickaxe",
68
+ "make_iron_pickaxe", "make_wood_sword", "make_stone_sword",
69
+ "make_iron_sword"
70
+ ]
71
+
72
+ action_tool = {
73
+ "type": "function",
74
+ "function": {
75
+ "name": "take_action",
76
+ "description": "Take an action in the Crafter world",
77
+ "parameters": {
78
+ "type": "object",
79
+ "properties": {
80
+ "action": {
81
+ "type": "string",
82
+ "enum": actions,
83
+ "description": f"Action to take. Available: {', '.join(actions)}",
84
+ }
85
+ },
86
+ "required": ["action"],
87
+ },
88
+ },
89
+ }
90
+
91
+ # Run episode
92
+ done = False
93
+ while not done and step_count < self.max_steps:
94
+ # Get observation (would include visual state in full implementation)
95
+ obs_str = f"Crafter Step {step_count}\n"
96
+ obs_str += f"Current achievements: {achievements}\n"
97
+ obs_str += "What action should you take to survive and progress?"
98
+
99
+ # Get action from model
100
+ try:
101
+ response = await client.generate(
102
+ model=model,
103
+ messages=[
104
+ {
105
+ "role": "system",
106
+ "content": "You are an expert at survival games. Use the take_action tool to survive and achieve goals in Crafter.",
107
+ },
108
+ {"role": "user", "content": obs_str},
109
+ ],
110
+ tools=[action_tool],
111
+ temperature=temperature,
112
+ max_tokens=100,
113
+ )
114
+
115
+ # Extract action
116
+ action_name = "noop"
117
+ if response.get("tool_calls"):
118
+ tool_call = response["tool_calls"][0]
119
+ args = json.loads(tool_call["function"]["arguments"])
120
+ action_name = args.get("action", "noop")
121
+
122
+ action_idx = actions.index(action_name) if action_name in actions else 0
123
+
124
+ # Take step
125
+ obs, reward, done, info = env.step(action_idx)
126
+
127
+ # Update achievements
128
+ if "achievements" in info:
129
+ achievements.update(info["achievements"])
130
+
131
+ # Track rewards
132
+ if reward > 0:
133
+ event_rewards.append(
134
+ EventReward(
135
+ event_id=f"step_{step_count}",
136
+ reward=reward,
137
+ metadata={"action": action_name, "achievements": achievements.copy()},
138
+ )
139
+ )
140
+
141
+ step_count += 1
142
+
143
+ except Exception as e:
144
+ done = True
145
+ break
146
+
147
+ # Calculate outcome reward based on achievements
148
+ total_achievements = sum(achievements.values())
149
+ success = total_achievements >= 3 # At least 3 achievements
150
+
151
+ return TaskResult(
152
+ success=success,
153
+ outcome_reward=OutcomeReward(
154
+ reward=float(total_achievements),
155
+ metadata={
156
+ "steps": step_count,
157
+ "achievements": achievements,
158
+ "seed": seed,
159
+ },
160
+ ),
161
+ event_rewards=event_rewards,
162
+ total_steps=step_count,
163
+ metadata={"achievements": achievements},
164
+ )
165
+
166
+
167
+ # Define baseline configuration (only if Crafter is available)
168
+ if CRAFTER_AVAILABLE:
169
+ warming_up_to_rl_baseline = BaselineConfig(
170
+ baseline_id="warming_up_to_rl",
171
+ name="Warming Up to RL - Crafter",
172
+ description="Crafter survival game baseline for comparing agent performance on RL tasks",
173
+ task_runner=CrafterRunner,
174
+ splits={
175
+ "train": DataSplit(name="train", seeds=list(range(20))),
176
+ "val": DataSplit(name="val", seeds=list(range(20, 25))),
177
+ "test": DataSplit(name="test", seeds=list(range(25, 30))),
178
+ },
179
+ default_policy_config={
180
+ "model": "gpt-4o-mini",
181
+ "temperature": 0.7,
182
+ },
183
+ default_env_config={
184
+ "max_steps": 1000,
185
+ },
186
+ tags=["rl", "survival", "achievements", "blog-post"],
187
+ )
@@ -88,3 +88,7 @@ Expected output for successful rollout:
88
88
  - `mean_return` ≈ 1.0+ (if full submit success)
89
89
 
90
90
 
91
+
92
+
93
+
94
+