synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,7 @@
1
1
  # RL training starting from a finetuned model id (TOML-only model selection)
2
2
 
3
+ type = "rl"
4
+
3
5
  [services]
4
6
  # Task app base URL used by the RL job for rollouts
5
7
  # task_url = "https://YOUR-TASK-APP.modal.run"
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
 
6
6
  import argparse
7
7
  import json
8
+ import os
8
9
  import sqlite3
9
10
  import sys
10
11
  from collections import Counter, defaultdict
@@ -12,6 +13,13 @@ from collections.abc import Iterable
12
13
  from pathlib import Path
13
14
  from typing import Any
14
15
 
16
+ from synth_ai._utils.prompts import ensure_required_args
17
+ from synth_ai.tracing_v3.constants import (
18
+ TRACE_DB_BASENAME,
19
+ TRACE_DB_DIR,
20
+ canonical_trace_db_name,
21
+ )
22
+
15
23
  Row = sqlite3.Row
16
24
 
17
25
 
@@ -489,55 +497,81 @@ def _validate_dataset(records: list[dict[str, Any]]) -> None:
489
497
 
490
498
 
491
499
  def _find_trace_database() -> Path | None:
492
- """Automatically discover the trace database in common locations."""
500
+ """Automatically discover the most recent trace database in common locations."""
493
501
 
494
- # Check for demo directory from state
495
- try:
496
- state_path = Path.home() / ".synth-ai" / "demo.json"
497
- if state_path.exists():
498
- import json
499
-
500
- with state_path.open() as f:
501
- data = json.load(f)
502
- demo_dir = data.get("DEMO_DIR")
503
- if demo_dir:
504
- candidate = Path(demo_dir) / "traces" / "v3" / "synth_ai.db"
505
- if candidate.exists():
506
- return candidate
507
- except Exception:
508
- pass
502
+ candidates: list[Path] = []
509
503
 
510
- # Search upward from current directory
504
+ # Walk up parent directories from CWD
511
505
  cwd = Path.cwd()
512
506
  for parent in [cwd] + list(cwd.parents):
513
- candidate = parent / "traces" / "v3" / "synth_ai.db"
514
- if candidate.exists():
515
- return candidate
516
-
517
- # Check standard locations
518
- standard_locations = [
519
- Path("traces/v3/synth_ai.db"),
520
- Path("../traces/v3/synth_ai.db"),
521
- Path.home() / "synth-ai" / "traces" / "v3" / "synth_ai.db",
522
- ]
523
-
524
- for location in standard_locations:
507
+ candidates.append(parent / "traces" / "v3")
508
+
509
+ # Standard fallback locations
510
+ candidates.extend(
511
+ [
512
+ TRACE_DB_DIR,
513
+ Path("../traces"),
514
+ Path.home() / "synth-ai" / "traces" / "v3",
515
+ ]
516
+ )
517
+
518
+ found: list[Path] = []
519
+ for directory in candidates:
525
520
  try:
526
- if location.exists():
527
- return location.resolve()
521
+ if not directory.exists():
522
+ continue
523
+ for pattern in (
524
+ f"{TRACE_DB_BASENAME}_*.db",
525
+ canonical_trace_db_name(),
526
+ ):
527
+ for candidate in directory.glob(pattern):
528
+ found.append(candidate.resolve())
528
529
  except Exception:
529
530
  continue
530
531
 
531
- return None
532
+ if not found:
533
+ return None
534
+
535
+ found.sort(key=lambda p: p.stat().st_mtime, reverse=True)
536
+ return found[0]
537
+
538
+
539
+ def _discover_local_trace_dbs(root: Path) -> list[Path]:
540
+ """Return trace DBs under *root* (recursively), newest first."""
541
+
542
+ candidates: set[Path] = set()
543
+ ignore_dirs = {".git", ".venv", "__pycache__", "node_modules", "dist", "build"}
544
+ target_exact = canonical_trace_db_name()
545
+
546
+ for dirpath, dirnames, filenames in os.walk(root):
547
+ dirnames[:] = [d for d in dirnames if d not in ignore_dirs]
548
+ for filename in filenames:
549
+ if filename == target_exact or (
550
+ filename.startswith(f"{TRACE_DB_BASENAME}_") and filename.endswith(".db")
551
+ ):
552
+ path = Path(dirpath) / filename
553
+ try:
554
+ candidates.add(path.resolve())
555
+ except Exception:
556
+ continue
557
+
558
+ return sorted(candidates, key=lambda p: p.stat().st_mtime, reverse=True)
532
559
 
533
560
 
534
561
  def main() -> None:
535
562
  parser = argparse.ArgumentParser(description=__doc__)
536
- parser.add_argument("--db", type=Path, default=None, help="Path to tracing_v3 SQLite DB")
537
563
  parser.add_argument(
538
- "--output",
564
+ "--in",
565
+ dest="input_path",
566
+ type=Path,
567
+ default=None,
568
+ help="Path to tracing_v3 SQLite DB",
569
+ )
570
+ parser.add_argument(
571
+ "--out",
572
+ dest="output_path",
539
573
  type=Path,
540
- required=False,
574
+ default=None,
541
575
  help="Destination JSONL path for the exported dataset",
542
576
  )
543
577
  parser.add_argument(
@@ -593,25 +627,109 @@ def main() -> None:
593
627
  )
594
628
  args = parser.parse_args()
595
629
 
596
- # Auto-discover database if not specified
597
- db_path = args.db
598
- if db_path is None:
599
- db_path = _find_trace_database()
600
- if db_path:
601
- print(f"Found trace database: {db_path}")
602
- else:
603
- print("\nTrace database configuration:")
604
- db_input = input("Trace database path [traces/v3/synth_ai.db]: ").strip()
605
- db_path = Path(db_input) if db_input else Path("traces/v3/synth_ai.db")
630
+ default_output_path = (Path.cwd() / "ft_data" / "crafter_sft.jsonl").resolve()
631
+
632
+ initial_path: Path | None = None
633
+ if args.input_path is not None:
634
+ initial_path = Path(args.input_path).expanduser().resolve()
635
+ else:
636
+ discovered = _find_trace_database()
637
+ if discovered is not None:
638
+ initial_path = discovered.expanduser().resolve()
639
+ args.input_path = initial_path
640
+
641
+ if args.output_path is None:
642
+ args.output_path = default_output_path
643
+
644
+ local_candidates = _discover_local_trace_dbs(Path.cwd())
645
+ if local_candidates:
646
+ print("\nDiscovered trace databases:")
647
+ for idx, path in enumerate(local_candidates, start=1):
648
+ marker = " <- most recent" if idx == 1 else ""
649
+ print(f" {idx}) {path}{marker}")
650
+ print(" m) Enter path manually")
651
+ print(" 0) Abort")
652
+
653
+ default_index = 1
654
+ if initial_path:
655
+ for idx, candidate in enumerate(local_candidates, start=1):
656
+ if candidate == initial_path:
657
+ default_index = idx
658
+ break
606
659
 
660
+ while True:
661
+ prompt = f"Select database [{default_index}]: "
662
+ choice = input(prompt).strip().lower()
663
+ if not choice:
664
+ args.input_path = local_candidates[default_index - 1]
665
+ break
666
+ if choice == "0":
667
+ raise SystemExit("Aborted by user.")
668
+ if choice in {"m", "manual"}:
669
+ manual = input("Enter trace database path: ").strip()
670
+ if manual:
671
+ args.input_path = Path(manual)
672
+ break
673
+ print("Path required; try again.")
674
+ continue
675
+ try:
676
+ idx = int(choice)
677
+ except ValueError:
678
+ print("Invalid selection; enter a number, 'm', or 0 to abort.")
679
+ continue
680
+ if 1 <= idx <= len(local_candidates):
681
+ args.input_path = local_candidates[idx - 1]
682
+ break
683
+ print(f"Select between 1 and {len(local_candidates)}, 'm', or 0.")
684
+ elif initial_path is not None:
685
+ args.input_path = initial_path
686
+
687
+ # If output wasn't overridden, derive it from the chosen DB name
688
+ if args.output_path == default_output_path and args.input_path:
689
+ db_name = Path(args.input_path).name # e.g., task_app_traces_2025-10-23_13-23-02.db
690
+ timestamp = db_name[:-3] if db_name.endswith(".db") else db_name
691
+ if timestamp.startswith("task_app_traces_"):
692
+ timestamp = timestamp[len("task_app_traces_") :]
693
+ derived_name = f"sft_dataset_{timestamp}.jsonl"
694
+ args.output_path = (Path.cwd() / "ft_data" / derived_name).resolve()
695
+
696
+ input_default = (
697
+ Path(args.input_path).expanduser().resolve()
698
+ if args.input_path is not None
699
+ else (TRACE_DB_DIR / canonical_trace_db_name()).expanduser().resolve()
700
+ )
701
+ output_default = Path(args.output_path).expanduser().resolve() if args.output_path else default_output_path
702
+
703
+ args = ensure_required_args(
704
+ args,
705
+ {
706
+ "input_path": "Trace database path",
707
+ "output_path": "Output JSONL path",
708
+ },
709
+ coerce={
710
+ "input_path": lambda raw: Path(raw).expanduser().resolve(),
711
+ "output_path": lambda raw: Path(raw).expanduser().resolve(),
712
+ },
713
+ defaults={
714
+ "input_path": input_default,
715
+ "output_path": output_default,
716
+ },
717
+ )
718
+
719
+ db_path = Path(args.input_path).expanduser().resolve()
720
+ print(f"Trace database: {db_path}")
607
721
  if not db_path.exists():
608
- print(f"Database not found: {db_path}", file=sys.stderr)
609
- raise SystemExit(1)
722
+ discovered = _find_trace_database()
723
+ if discovered and discovered.exists():
724
+ discovered = discovered.resolve()
725
+ print(f"Discovered trace database: {discovered}")
726
+ db_path = discovered
727
+ else:
728
+ print(f"Database not found: {db_path}", file=sys.stderr)
729
+ raise SystemExit(1)
610
730
 
611
- output_path = args.output
612
- if not output_path:
613
- output_path = Path("ft_data/crafter_traces.jsonl")
614
- print(f"Output will be written to: {output_path.resolve()}")
731
+ output_path = Path(args.output_path).expanduser().resolve()
732
+ print(f"Output dataset: {output_path}")
615
733
 
616
734
  min_unique = args.min_unique
617
735
  if min_unique is None:
@@ -619,15 +737,11 @@ def main() -> None:
619
737
  print(f"Minimum unique achievements filter: {min_unique} (all traces)")
620
738
 
621
739
  # Override args with prompted values
622
- args.db = db_path
623
- args.output = output_path
740
+ args.input_path = db_path
741
+ args.output_path = output_path
624
742
  args.min_unique = min_unique
625
743
 
626
- if not args.db.exists():
627
- print(f"Database not found: {args.db}", file=sys.stderr)
628
- raise SystemExit(1)
629
-
630
- conn = connect(args.db)
744
+ conn = connect(args.input_path)
631
745
  try:
632
746
  (
633
747
  achievements_map,
@@ -708,11 +822,11 @@ def main() -> None:
708
822
  raise SystemExit(1)
709
823
 
710
824
  _validate_dataset(dataset)
711
- write_jsonl(args.output, dataset)
825
+ write_jsonl(args.output_path, dataset)
712
826
  session_ids = {item.get("metadata", {}).get("session_id") for item in dataset}
713
827
  session_ids.discard(None)
714
828
  print(
715
- f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output.resolve()}",
829
+ f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output_path.resolve()}",
716
830
  file=sys.stderr,
717
831
  )
718
832
  finally:
@@ -47,8 +47,10 @@ async def run(args: argparse.Namespace) -> None:
47
47
 
48
48
  inference_url = args.inference_url or f"{args.base_url.rstrip('/')}/proxy/groq"
49
49
 
50
+ from synth_ai.task.contracts import RolloutMode
50
51
  request = RolloutRequest(
51
52
  run_id=args.run_id,
53
+ mode=RolloutMode.EVAL,
52
54
  env=RolloutEnvSpec(env_name="crafter", seed=args.seed, config={"seed": args.seed}),
53
55
  policy=RolloutPolicySpec(
54
56
  policy_name="groq-smoke",
@@ -1,179 +1,110 @@
1
1
  # Warming Up to RL (Crafter)
2
2
 
3
- The Crafter example demonstrates the full Synth AI workflow: task app serving, Groq rollouts, tracing, SFT dataset export, FFT training, evaluation of fine-tuned models, and RL training.
4
-
5
- ## Quick Reference Commands
6
-
7
- - Serve task app locally with tracing:
8
- ```bash
9
- uvx synth-ai serve --port 8001 --env-file examples/warming_up_to_rl/.env --trace traces/v3
10
- ```
11
- - Deploy to Modal:
12
- ```bash
13
- uvx synth-ai deploy grpo-crafter --name grpo-crafter-task-app
14
- ```
15
- - Groq rollout (server-side):
16
- ```bash
17
- uv run python examples/warming_up_to_rl/run_eval.py --toml examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml --use-rollout
18
- ```
19
- - Export SFT data from traced runs:
20
- ```bash
21
- python examples/warming_up_to_rl/export_trace_sft.py --db traces/v3/synth_ai.db --output ft_data/crafter_traces.jsonl
22
- ```
23
- - FFT via CLI:
24
- ```bash
25
- uvx synth-ai train --type sft --config examples/warming_up_to_rl/configs/crafter_fft.toml --dataset /absolute/path/to/data.jsonl
26
- ```
27
- - Evaluate FFT checkpoint:
28
- ```bash
29
- uv run python examples/warming_up_to_rl/run_eval.py --toml examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml --use-rollout
30
- ```
31
- - RL via CLI (FFT-first):
32
- ```bash
33
- uvx synth-ai train --type rl --config examples/warming_up_to_rl/configs/rl_from_ft.toml
34
- ```
35
-
36
- ---
3
+ This folder contains an end-to-end Crafter workflow: stand up the task app, collect Groq-powered rollouts, export tracing data for supervised fine-tuning, run FFT/RL jobs, and evaluate checkpoints. Commands assume the repository root as the working directory unless stated otherwise.
37
4
 
38
5
  ## 1. Prerequisites
39
6
 
40
7
  - Python 3.11+
41
- - `uv`/`uvx` available (or install Synth in a virtualenv)
42
- - Modal CLI (`modal token new`) if you plan to deploy the task app
43
- - `.env` in this directory with at least:
44
- - `SYNTH_API_KEY`
45
- - `ENVIRONMENT_API_KEY`
46
- - `TASK_APP_URL` (when running against a hosted task app)
47
- - Optional: `GROQ_API_KEY`, `OPENAI_API_KEY` for proxy endpoints
48
-
49
- `uvx synth-ai setup` can populate the `.env` by guiding you through the dashboard handshake.
8
+ - [`uv`](https://docs.astral.sh/uv/) / `uvx` (or install `synth-ai` inside a virtualenv)
9
+ - Modal CLI (`modal token new`) if you plan to deploy the task app remotely
10
+ - API keys:
11
+ - `SYNTH_API_KEY` and `ENVIRONMENT_API_KEY` are required for CLI flows
12
+ - `GROQ_API_KEY` (used by the Groq policy) and optional `OPENAI_API_KEY`
13
+ - Run `uvx synth-ai setup` once to pair with the Synth dashboard and populate `~/.synth-ai/user_config.json`
50
14
 
51
- > All commands below assume you are running from the repository root unless noted.
15
+ ## 2. Task App
52
16
 
53
- ## 2. Task App Operations
54
-
55
- ### Local development
17
+ ### Local serve (FastAPI)
56
18
 
57
19
  ```bash
58
- uvx synth-ai serve --port 8001 --env-file examples/warming_up_to_rl/.env --trace traces/v3 --trace-db traces/v3/synth_ai.db
20
+ uvx synth-ai serve \
21
+ --env-file examples/warming_up_to_rl/.env \
22
+ --host 127.0.0.1 --port 8001 \
23
+ --trace traces/v3
59
24
  ```
60
25
 
61
- - `--trace` and `--trace-db` enable tracing v3 and SFT JSONL dumps.
62
- - Add `--reload` for uvicorn auto-reload while editing code.
26
+ - `--trace` creates/uses `traces/v3/task_app_traces_<timestamp>.db` for the lifetime of the server. All rollouts append to this file.
27
+ - Add `--trace-db` to override the SQLite path (one DB per server instance).
28
+ - Pass `--reload` during development for auto-reload.
63
29
 
64
30
  ### Modal deploy / serve
65
31
 
66
32
  ```bash
67
- uvx synth-ai deploy grpo-crafter --name grpo-crafter-task-app --env-file examples/warming_up_to_rl/.env
68
- uvx synth-ai modal-serve grpo-crafter --name grpo-crafter-task-app --env-file examples/warming_up_to_rl/.env
33
+ uvx synth-ai deploy grpo-crafter --name grpo-crafter-task-app
34
+ uvx synth-ai modal-serve grpo-crafter --name grpo-crafter-task-app
69
35
  ```
70
36
 
71
- Both commands preflight the environment key with the backend when `SYNTH_API_KEY` is present.
72
-
73
- ## 3. Baseline Evaluations (Groq and Synth vLLM)
74
-
75
- Evaluation scripts auto-load `.env` values. Update TOMLs under `configs/` with the correct `task_app_url` and provider-specific model names.
76
-
77
- - Groq Qwen3-32B:
78
- ```bash
79
- uv run python examples/warming_up_to_rl/run_eval.py --toml examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml --use-rollout
80
- ```
81
- - Synth vLLM Qwen3-4B (Modal-hosted inference URL specified in TOML):
82
- ```bash
83
- uv run python examples/warming_up_to_rl/run_eval.py --toml examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml --use-rollout
84
- ```
85
-
86
- `--use-rollout` drives the task app’s `/rollout` endpoint so achievements and metrics are captured. Without it the script issues per-step `initialize/step/terminate` calls.
87
-
88
- ## 4. Tracing and SFT Dataset Export
89
-
90
- 1. Serve the task app with tracing enabled (see Section 2). Optionally, run the traced rollout helper against the running server:
91
- ```bash
92
- uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
93
- --base-url http://localhost:8001 \
94
- --api-key "$ENVIRONMENT_API_KEY" \
95
- --inference-api-key "$GROQ_API_KEY" \
96
- --model qwen/qwen3-32b \
97
- --inference-url https://api.groq.com/openai \
98
- --max-llm-calls 3 \
99
- --run-id local-trace
100
- ```
101
- 2. Inspect local trace databases:
102
- ```bash
103
- uvx synth-ai traces --limit 10
104
- ```
105
- 3. Export JSONL suitable for SFT:
106
- ```bash
107
- python examples/warming_up_to_rl/export_trace_sft.py \
108
- --db traces/v3/synth_ai.db \
109
- --min-achievements 3 \
110
- --output ft_data/crafter_traces.jsonl
111
- ```
112
-
113
- The exporter enriches each example with achievements unlocked, model metadata, and reward summaries.
114
-
115
- ## 5. SFT / FFT Training
116
-
117
- ### Preferred: `uvx synth-ai train`
37
+ Both commands reuse the same tracing defaults; the backend persists rollouts into the configured SQLite/Turso store.
38
+
39
+ ## 3. Collect rollouts
40
+
41
+ Hit the running task app with the local helper to gather a traced rollout (Groq policy shown below):
118
42
 
119
43
  ```bash
120
- uvx synth-ai train \
121
- --type sft \
122
- --config examples/warming_up_to_rl/configs/crafter_fft.toml \
123
- --dataset /absolute/path/to/crafter_traces.jsonl
44
+ python examples/warming_up_to_rl/run_local_rollout_traced.py \
45
+ --base-url http://localhost:8001 \
46
+ --api-key "$ENVIRONMENT_API_KEY" \
47
+ --inference-api-key "$GROQ_API_KEY" \
48
+ --model qwen/qwen3-32b \
49
+ --inference-url https://api.groq.com/openai \
50
+ --max-llm-calls 3 \
51
+ --run-id local-trace
124
52
  ```
125
53
 
126
- The CLI will:
127
- - Prompt for `.env` selection (or use `--env-file`).
128
- - Upload training (and optional validation) data to `/learning/files`.
129
- - Submit the job and poll until completion unless `--no-poll` is set.
54
+ Artifacts produced per rollout:
55
+ - `traces/v3/task_app_traces_<timestamp>.db`: the task app’s append-only database (one per server lifetime; new rollouts append rows).
56
+ - `local-trace_trace.json`: single-run JSON snapshot for inspection.
130
57
 
131
- ### Legacy script
58
+ ## 4. Export SFT-ready data
132
59
 
133
60
  ```bash
134
- uv run python examples/warming_up_to_rl/run_fft_and_save.py \
135
- --toml examples/warming_up_to_rl/configs/crafter_fft.toml \
136
- --data /absolute/path/to/crafter_traces.jsonl \
137
- --poll-seconds 1800
61
+ python examples/warming_up_to_rl/export_trace_sft.py
138
62
  ```
139
63
 
140
- The script writes the resulting model ID to `ft_model_id.txt`. Use that ID in evaluation and RL configs (e.g., `model = "ft:abc123"`).
64
+ - When run without `--in`, the script lists every `task_app_traces*.db` under the current directory (and subdirectories), sorted by recency, and prompts you to pick one (the newest is marked `← most recent`).
65
+ - The exporter validates the trace data, filters sessions, and writes JSONL to `ft_data/crafter_sft.jsonl` by default (override with `--out`).
141
66
 
142
- ## 6. Evaluate the Fine-tuned Model
67
+ ## 5. FFT / SFT Training
143
68
 
144
- After FFT completes, update `configs/eval_fft_qwen4b.toml` so `model = "ft:<model_id>"`, then rerun the evaluation:
69
+ Recommended via CLI:
145
70
 
146
71
  ```bash
147
- uv run python examples/warming_up_to_rl/run_eval.py --toml examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml --use-rollout
72
+ uvx synth-ai train \
73
+ --type sft \
74
+ --config examples/warming_up_to_rl/configs/crafter_fft.toml \
75
+ --dataset /absolute/path/to/crafter_sft.jsonl
148
76
  ```
149
77
 
150
- This reuses the same Groq/vLLM pipeline but exercises the finetuned checkpoint.
78
+ The CLI uploads training data, submits the job to the Synth backend, and polls for completion. A legacy helper (`run_fft_and_save.py`) is still provided for ad-hoc usage.
151
79
 
152
- ## 7. RL Training
80
+ ## 6. Evaluate checkpoints
153
81
 
154
- ### Preferred: `uvx synth-ai train --type rl`
82
+ Update the relevant TOML with the model identifier (e.g., `model = "ft:<model_id>"`) and run:
155
83
 
156
84
  ```bash
157
- uvx synth-ai train \
158
- --type rl \
159
- --config examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml
85
+ uv run python examples/warming_up_to_rl/run_eval.py \
86
+ --toml examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml \
87
+ --use-rollout
160
88
  ```
161
89
 
162
- During the interactive setup the CLI ensures `SYNTH_API_KEY`, `ENVIRONMENT_API_KEY`, and `TASK_APP_URL` are present, health-checks the task app, and submits the RL job to `/rl/jobs`.
90
+ `--use-rollout` exercises the `/rollout` endpoint so achievements/rewards are surfaced in traces.
163
91
 
164
- ### Legacy script
92
+ ## 7. RL Training
165
93
 
166
94
  ```bash
167
- uv run python examples/warming_up_to_rl/run_rl_and_save.py \
168
- --config examples/warming_up_to_rl/configs/rl_from_ft.toml
95
+ uvx synth-ai train \
96
+ --type rl \
97
+ --config examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml
169
98
  ```
170
99
 
171
- To start directly from a base model, switch the config to `rl_from_base_qwen4b.toml` and ensure `[model].base` is populated.
100
+ Start from `rl_from_ft.toml` if you want to bootstrap from a previously fine-tuned checkpoint.
101
+
102
+ ---
172
103
 
173
- ## 8. Additional Utilities
104
+ ### Notes on tracing
174
105
 
175
- - `manage_secrets.py` convenience helpers for Modal secret management.
176
- - `run_local_rollout.py`, `run_local_rollout_parallel.py`, `run_rollout_remote.py` alternative rollout launchers for benchmarking.
177
- - `analyze_trace_db.py` inspect trace quality/achievements before exporting.
106
+ - **One SQLite DB per server:** every task app instance maintains a single `task_app_traces_<timestamp>.db` and appends each new rollout. If you want a fresh file, start another `synth-ai serve` with a different `--trace-db` path.
107
+ - **JSON snapshots per run:** `run_local_rollout_traced.py` writes `<run_id>_trace.json` so you can inspect or hand-edit individual runs.
108
+ - **Exporter discovery:** the SFT exporter recursively catalogs all `task_app_traces*.db` files beneath the task app directory, allowing you to select any historical snapshot when exporting training data.
178
109
 
179
- Refer to `docs/workflows/` for end-to-end guidance that mirrors these commands.
110
+ These conventions keep tracing predictable: continuous history per server, easy selection of historical DBs, and one-off JSON exports for quick analysis.
@@ -12,7 +12,7 @@ from typing import Any
12
12
 
13
13
  import requests
14
14
  from dotenv import load_dotenv
15
- from synth_ai.config.base_url import PROD_BASE_URL_DEFAULT
15
+ from synth_ai._utils.base_url import PROD_BASE_URL_DEFAULT
16
16
 
17
17
 
18
18
  def mask(val: str) -> str:
@@ -42,8 +42,10 @@ def build_rollout_request(
42
42
  trace_format=trace_format,
43
43
  return_trace=return_trace,
44
44
  )
45
+ from synth_ai.task.contracts import RolloutMode
45
46
  return RolloutRequest(
46
47
  run_id=run_id,
48
+ mode=RolloutMode.EVAL,
47
49
  env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
48
50
  policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_config),
49
51
  ops=ops,
@@ -33,12 +33,14 @@ def build_rollout_request(
33
33
  "Authorization": f"Bearer {api_key}",
34
34
  },
35
35
  }
36
+ from synth_ai.task.contracts import RolloutMode
36
37
  return RolloutRequest(
37
38
  run_id=run_id,
38
39
  env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
39
40
  policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_config),
40
41
  ops=ops,
41
42
  record=RolloutRecordConfig(trajectories=True),
43
+ mode=RolloutMode.EVAL,
42
44
  on_done="reset",
43
45
  safety=RolloutSafetyConfig(),
44
46
  )
@@ -46,12 +46,14 @@ def build_rollout_request(
46
46
  trace_format=trace_format,
47
47
  return_trace=return_trace,
48
48
  )
49
+ from synth_ai.task.contracts import RolloutMode
49
50
  return RolloutRequest(
50
51
  run_id=run_id,
51
52
  env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
52
53
  policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_config),
53
54
  ops=ops,
54
55
  record=record_cfg,
56
+ mode=RolloutMode.EVAL,
55
57
  on_done="reset",
56
58
  safety=RolloutSafetyConfig(),
57
59
  )
@@ -53,12 +53,14 @@ def build_rollout_request(
53
53
  trace_format=trace_format,
54
54
  )
55
55
 
56
+ from synth_ai.task.contracts import RolloutMode
56
57
  return RolloutRequest(
57
58
  run_id=run_id,
58
59
  env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
59
60
  policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_config),
60
61
  ops=ops,
61
62
  record=record,
63
+ mode=RolloutMode.EVAL,
62
64
  on_done="reset",
63
65
  safety=RolloutSafetyConfig(),
64
66
  )
@@ -10,7 +10,7 @@ from pathlib import Path
10
10
  from typing import Any
11
11
 
12
12
  import requests
13
- from synth_ai.config.base_url import PROD_BASE_URL_DEFAULT
13
+ from synth_ai._utils.base_url import PROD_BASE_URL_DEFAULT
14
14
 
15
15
 
16
16
  def _load_toml(path: Path) -> dict[str, Any]:
@@ -60,12 +60,14 @@ def build_request(
60
60
  for _ in range(max(llm_calls, 1)):
61
61
  ops.extend(["agent", "env"])
62
62
 
63
+ from synth_ai.task.contracts import RolloutMode
63
64
  return RolloutRequest(
64
65
  run_id=run_id,
65
66
  env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
66
67
  policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_config),
67
68
  ops=ops,
68
69
  record=RolloutRecordConfig(trajectories=True),
70
+ mode=RolloutMode.EVAL,
69
71
  on_done="reset",
70
72
  safety=RolloutSafetyConfig(),
71
73
  )