synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from synth_ai.environments.examples.red.taskset import INSTANCE as RED_DEFAULT_I
12
12
  from synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression import (
13
13
  PalletTownProgressionCompositeReward,
14
14
  )
15
- from synth_ai.task.apps import TaskAppEntry, register_task_app
15
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
16
16
  from synth_ai.task.contracts import (
17
17
  RolloutMetrics,
18
18
  RolloutRequest,
@@ -29,6 +29,8 @@ from synth_ai.task.tracing_utils import (
29
29
  tracing_env_enabled,
30
30
  )
31
31
  from synth_ai.tracing_v3.session_tracer import SessionTracer
32
+ from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
33
+ from datetime import datetime, UTC
32
34
 
33
35
  logger = logging.getLogger(__name__)
34
36
 
@@ -260,8 +262,14 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
260
262
  {
261
263
  "role": "system",
262
264
  "content": (
263
- "You are controlling Pokémon Red. Respond with a single tool call named 'press_button' "
264
- "with JSON arguments {button: 'A|B|UP|DOWN|LEFT|RIGHT|START|SELECT', frames: 1-120}."
265
+ "You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
266
+ "Your goal is to make progress in the game. "
267
+ "IMPORTANT: Always use the 'execute_sequence' tool to submit 5-10 actions per call. "
268
+ "Do not reason about which tool to use - execute_sequence is the only tool available. "
269
+ "Choose appropriate button presses based on what you see in the game screen. "
270
+ "Plan 5-10 actions ahead to play efficiently. "
271
+ "CRITICAL: If stuck in a text box (text_box_active=True), try pressing B button first, then try A. "
272
+ "Always respond with exactly one tool call containing 5-10 actions."
265
273
  ),
266
274
  },
267
275
  {
@@ -277,7 +285,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
277
285
  "type": "function",
278
286
  "function": {
279
287
  "name": "execute_sequence",
280
- "description": "Execute multiple button presses in sequence. More efficient than separate calls. Recommended: 5-10 actions per call.",
288
+ "description": "Execute multiple button presses in sequence. More efficient than separate calls. ALWAYS use this tool. Plan 5-10 actions ahead to play efficiently.",
281
289
  "parameters": {
282
290
  "type": "object",
283
291
  "properties": {
@@ -300,31 +308,15 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
300
308
  },
301
309
  "required": ["button", "frames"]
302
310
  },
303
- "minItems": 1,
304
- "maxItems": 20,
305
- "description": "Sequence of button presses to execute"
311
+ "minItems": 5,
312
+ "maxItems": 10,
313
+ "description": "Sequence of 5-10 button presses to execute. Plan ahead to navigate efficiently."
306
314
  }
307
315
  },
308
316
  "required": ["actions"],
309
317
  "additionalProperties": False,
310
318
  },
311
319
  },
312
- },
313
- {
314
- "type": "function",
315
- "function": {
316
- "name": "press_button",
317
- "description": "Press a single Game Boy button for N frames (use execute_sequence for multiple actions)",
318
- "parameters": {
319
- "type": "object",
320
- "properties": {
321
- "button": {"type": "string", "enum": ["UP", "DOWN", "LEFT", "RIGHT", "A", "B", "START", "SELECT"]},
322
- "frames": {"type": "integer", "minimum": 1, "maximum": 120},
323
- },
324
- "required": ["button"],
325
- "additionalProperties": False,
326
- },
327
- },
328
320
  }
329
321
  ],
330
322
  "tool_choice": {"type": "function", "function": {"name": "execute_sequence"}},
@@ -350,35 +342,154 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
350
342
  if "api.openai.com" in inference_url and not inference_url.endswith("/chat/completions"):
351
343
  inference_url = inference_url + "/v1/chat/completions"
352
344
 
345
+ # Debug: print exact payload being sent
346
+ import json as _json_debug
347
+ print(f"\n{'='*80}")
348
+ print(f"[pokemon_red] INFERENCE REQUEST DEBUG")
349
+ print(f"{'='*80}")
350
+ print(f"Inference URL: {inference_url}")
351
+ print(f"Payload keys: {list(payload.keys())}")
352
+ print(f"Payload (formatted):")
353
+ print(_json_debug.dumps(payload, indent=2)[:2000])
354
+ print(f"{'='*80}\n")
355
+
356
+
353
357
  if is_external:
354
358
  # External API: use direct HTTP client with auth header
355
359
  headers = {}
360
+ import os
356
361
  if "api.openai.com" in inference_url:
357
- import os
358
362
  api_key = os.getenv("OPENAI_API_KEY")
359
363
  if api_key:
360
364
  headers["Authorization"] = f"Bearer {api_key}"
365
+ elif "modal.run" in inference_url or "synth" in inference_url.lower():
366
+ # Synth API: use SYNTH_API_KEY
367
+ api_key = os.getenv("SYNTH_API_KEY")
368
+ if api_key:
369
+ headers["Authorization"] = f"Bearer {api_key}"
370
+ print(f"[pokemon_red] Using Synth API auth: {'Bearer ' + api_key[:10] + '...' if api_key else 'NONE'}")
371
+ # For 30B-A3B models, require H200 (A100 doesn't have enough memory)
372
+ model_id = payload.get("model", "")
373
+ if "30B-A3B" in model_id or "A3B" in model_id:
374
+ headers["X-GPU-Preference"] = "H200"
375
+ print(f"[pokemon_red] Setting X-GPU-Preference: H200 (required for A3B MoE)")
361
376
 
362
- async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
377
+ async with httpx.AsyncClient(timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0)) as client: # 30 min read timeout for cold starts
363
378
  resp = await client.post(inference_url, json=payload, headers=headers)
364
379
  else:
365
380
  # Internal proxy: use local base_url
366
381
  async with httpx.AsyncClient(
367
382
  base_url="http://127.0.0.1:" + str(fastapi_request.url.port or 8913),
368
- timeout=httpx.Timeout(60.0)
383
+ timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0) # 30 min read timeout for cold starts
369
384
  ) as client:
370
385
  resp = await client.post(inference_url, json=payload)
371
386
 
372
387
  resp.raise_for_status()
373
388
  data = resp.json()
374
- # Extract first tool call
389
+
390
+ # Record user message (system + user)
391
+ if tracer_instance is not None:
392
+ try:
393
+ print(f"[pokemon_red] Recording messages: tracer_instance={tracer_instance is not None}", flush=True)
394
+ # Record system message
395
+ await tracer_instance.record_message(
396
+ content=messages[0].get("content", ""),
397
+ message_type="system",
398
+ )
399
+ # Record user message
400
+ user_msg_content = messages[1].get("content", "")
401
+ if isinstance(user_msg_content, list):
402
+ # For multimodal content, extract text summary
403
+ text_parts = [item.get("text", "") for item in user_msg_content if item.get("type") == "text"]
404
+ user_msg_content = " ".join(text_parts) if text_parts else str(user_msg_content)
405
+ await tracer_instance.record_message(
406
+ content=user_msg_content,
407
+ message_type="user",
408
+ )
409
+ print(f"[pokemon_red] Recorded user messages", flush=True)
410
+ except Exception as exc:
411
+ logger.debug(f"[pokemon_red] Failed to record user messages: {exc}")
412
+ print(f"[pokemon_red] ERROR recording user messages: {exc}", flush=True)
413
+
414
+ # Debug logging for tool calls
415
+ print(f"\n{'='*80}")
416
+ print(f"[pokemon_red] INFERENCE RESPONSE DEBUG")
417
+ print(f"{'='*80}")
418
+ print(f"Response status: {resp.status_code}")
419
+ print(f"Response keys: {list(data.keys())}")
375
420
  choices = data.get("choices") or []
421
+ if choices:
422
+ message = choices[0].get("message") or {}
423
+ print(f"Message keys: {list(message.keys())}")
424
+ print(f"Message content preview: {str(message.get('content', ''))[:200]}")
425
+ print(f"Tool calls: {message.get('tool_calls', [])}")
426
+ print(f"Full message (formatted):")
427
+ print(_json_debug.dumps(message, indent=2)[:1500])
428
+ print(f"{'='*80}\n")
429
+
430
+ # Record assistant message/tool calls
431
+ if tracer_instance is not None:
432
+ try:
433
+ message = choices[0].get("message", {}) if choices else {}
434
+ tool_calls = message.get("tool_calls", [])
435
+ content = message.get("content", "")
436
+
437
+ if tool_calls:
438
+ # Record tool calls as assistant message
439
+ import json as _json_record
440
+ await tracer_instance.record_message(
441
+ content=_json_record.dumps(tool_calls) if tool_calls else (content or ""),
442
+ message_type="assistant",
443
+ metadata={"is_tool_call": True} if tool_calls else {},
444
+ )
445
+ elif content:
446
+ # Record text content as assistant message
447
+ await tracer_instance.record_message(
448
+ content=content,
449
+ message_type="assistant",
450
+ )
451
+ except Exception as exc:
452
+ logger.debug(f"[pokemon_red] Failed to record assistant message: {exc}")
453
+
454
+ # Extract first tool call
376
455
  if not choices:
456
+ print("[pokemon_red] WARNING: No choices in inference response")
377
457
  return {}
378
458
  message = choices[0].get("message") or {}
379
459
  raw_calls = message.get("tool_calls") or []
460
+
461
+ # If no structured tool_calls, try parsing XML tool calls from content
462
+ if not raw_calls:
463
+ content = message.get("content", "")
464
+ if content and "<tool_call>" in content:
465
+ import re as _re
466
+ import json as _json_parse
467
+ # Parse XML tool calls: <tool_call>{...}</tool_call>
468
+ xml_pattern = r'<tool_call>\s*({.*?})\s*</tool_call>'
469
+ matches = _re.findall(xml_pattern, content, _re.DOTALL)
470
+ if matches:
471
+ print(f"[pokemon_red] Parsed {len(matches)} XML tool call(s) from content")
472
+ try:
473
+ tool_data = _json_parse.loads(matches[0])
474
+ tool_name = tool_data.get("name", "")
475
+ args = tool_data.get("arguments", {})
476
+
477
+ print(f"[pokemon_red] Parsed tool: {tool_name}, args: {str(args)[:200]}")
478
+
479
+ # Handle execute_sequence tool
480
+ if tool_name == "execute_sequence":
481
+ return {"actions": args.get("actions", [])}
482
+
483
+ # Handle press_button tool (legacy single action)
484
+ if tool_name == "press_button":
485
+ return {"button": args.get("button"), "frames": int(args.get("frames") or 30)}
486
+ except Exception as parse_err:
487
+ print(f"[pokemon_red] Error parsing XML tool call: {parse_err}")
488
+
380
489
  if not raw_calls:
490
+ print(f"[pokemon_red] WARNING: No tool_calls in response. Content: {message.get('content', '')[:200]}")
381
491
  return {}
492
+
382
493
  f = raw_calls[0].get("function") or {}
383
494
  tool_name = f.get("name", "")
384
495
  args = f.get("arguments")
@@ -437,6 +548,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
437
548
  action_context = _build_action_context(prev_state, current_state)
438
549
  step_reward = await reward_fn.score(current_state, action_context)
439
550
 
551
+ # Record environment event
552
+ if tracer_instance is not None:
553
+ try:
554
+ event = EnvironmentEvent(
555
+ system_instance_id="environment:pokemon_red",
556
+ time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
557
+ reward=step_reward,
558
+ terminated=False,
559
+ truncated=False,
560
+ system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
561
+ system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
562
+ metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
563
+ )
564
+ await tracer_instance.record_event(event)
565
+ except Exception as exc:
566
+ logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
567
+
440
568
  sequence_reward += step_reward
441
569
  sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
442
570
 
@@ -488,6 +616,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
488
616
  current_state = dict(obs1) if isinstance(obs1, Mapping) else {}
489
617
  action_context = _build_action_context(prev_state, current_state)
490
618
  step_reward = await reward_fn.score(current_state, action_context)
619
+
620
+ # Record environment event
621
+ if tracer_instance is not None:
622
+ try:
623
+ event = EnvironmentEvent(
624
+ system_instance_id="environment:pokemon_red",
625
+ time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
626
+ reward=step_reward,
627
+ terminated=False,
628
+ truncated=False,
629
+ system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
630
+ system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
631
+ metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
632
+ )
633
+ await tracer_instance.record_event(event)
634
+ except Exception as exc:
635
+ logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
491
636
  total_reward += step_reward
492
637
 
493
638
  # Track reward components if non-zero
@@ -528,6 +673,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
528
673
  # Attempt policy-driven step if policy.config present
529
674
  policy_cfg = request.policy.config or {}
530
675
  if policy_cfg:
676
+ print(f"[pokemon_red] Calling _call_inference: tracer_instance={tracer_instance is not None}", flush=True)
531
677
  try:
532
678
  action = await _call_inference(policy_cfg, final_obs if isinstance(final_obs, Mapping) else {})
533
679
 
@@ -546,6 +692,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
546
692
  action_context = _build_action_context(prev_state, current_state)
547
693
  step_reward = await reward_fn.score(current_state, action_context)
548
694
 
695
+ # Record environment event
696
+ if tracer_instance is not None:
697
+ try:
698
+ event = EnvironmentEvent(
699
+ system_instance_id="environment:pokemon_red",
700
+ time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
701
+ reward=step_reward,
702
+ terminated=False,
703
+ truncated=False,
704
+ system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
705
+ system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
706
+ metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
707
+ )
708
+ await tracer_instance.record_event(event)
709
+ except Exception as exc:
710
+ logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
711
+
549
712
  sequence_reward += step_reward
550
713
  sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
551
714
 
@@ -684,23 +847,58 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
684
847
  # End session and get trace
685
848
  session_trace = await tracer_instance.end_session()
686
849
 
687
- # Build trace payload if requested
850
+ # Build trace payload if requested - ALWAYS use full format when return_trace=True
851
+ # This ensures markov_blanket_message_history is always included
688
852
  record_config = getattr(request, 'record', None)
853
+ print(f"[pokemon_red] TRACE DEBUG: record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
854
+ if session_trace:
855
+ print(f"[pokemon_red] TRACE DEBUG: IMMEDIATELY AFTER end_session: session_trace has {len(session_trace.markov_blanket_message_history)} messages, {len(session_trace.event_history)} events", flush=True)
856
+ print(f"[pokemon_red] TRACE DEBUG: session_trace.markov_blanket_message_history type: {type(session_trace.markov_blanket_message_history)}", flush=True)
857
+ if session_trace.markov_blanket_message_history:
858
+ print(f"[pokemon_red] TRACE DEBUG: First message type: {type(session_trace.markov_blanket_message_history[0])}, content: {str(session_trace.markov_blanket_message_history[0].content)[:100]}", flush=True)
859
+ else:
860
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY RIGHT AFTER end_session!", flush=True)
861
+
689
862
  if record_config and getattr(record_config, 'return_trace', False) and session_trace:
690
- trace_payload = {
691
- "session_id": session_trace.session_id,
692
- "created_at": session_trace.created_at.isoformat() if session_trace.created_at else None,
693
- "metadata": dict(session_trace.metadata or {}),
694
- "num_timesteps": session_trace.num_timesteps,
695
- "num_events": session_trace.num_events,
696
- "num_messages": session_trace.num_messages,
697
- }
863
+ # Always return full trace with all messages and events (no compact format)
864
+ import dataclasses
865
+ trace_payload = session_trace.to_dict()
866
+ print(f"[pokemon_red] TRACE DEBUG: to_dict() returned keys: {list(trace_payload.keys())}", flush=True)
867
+ print(f"[pokemon_red] TRACE DEBUG: to_dict() markov_blanket_message_history length: {len(trace_payload.get('markov_blanket_message_history', []))}", flush=True)
868
+
869
+ # Always manually serialize messages and events to ensure they're included
870
+ # asdict() may not recursively serialize nested dataclasses correctly
871
+ from synth_ai.tracing_v3.abstractions import SessionEventMarkovBlanketMessage, BaseEvent
872
+ if session_trace.markov_blanket_message_history:
873
+ print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.markov_blanket_message_history)} messages", flush=True)
874
+ trace_payload["markov_blanket_message_history"] = [
875
+ dataclasses.asdict(msg) if isinstance(msg, SessionEventMarkovBlanketMessage) else (msg if isinstance(msg, dict) else str(msg))
876
+ for msg in session_trace.markov_blanket_message_history
877
+ ]
878
+ else:
879
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.markov_blanket_message_history is EMPTY!", flush=True)
880
+ if session_trace.event_history:
881
+ print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.event_history)} events", flush=True)
882
+ trace_payload["event_history"] = [
883
+ dataclasses.asdict(evt) if isinstance(evt, BaseEvent) else (evt if isinstance(evt, dict) else str(evt))
884
+ for evt in session_trace.event_history
885
+ ]
886
+ else:
887
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.event_history is EMPTY!", flush=True)
888
+ print(f"[pokemon_red] TRACE DEBUG: Final trace payload has {len(trace_payload.get('markov_blanket_message_history', []))} messages, {len(trace_payload.get('event_history', []))} events", flush=True)
889
+ print(f"[pokemon_red] TRACE DEBUG: Final trace payload keys: {list(trace_payload.keys())}", flush=True)
890
+ else:
891
+ print(f"[pokemon_red] TRACE DEBUG: SKIPPING trace payload build - record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
698
892
  except Exception as exc:
699
893
  logger.warning(f"[pokemon_red] tracing finalization failed: {exc}")
894
+ print(f"[pokemon_red] TRACE DEBUG EXCEPTION: {exc}", flush=True)
895
+ import traceback
896
+ print(f"[pokemon_red] TRACE DEBUG EXCEPTION TRACEBACK: {traceback.format_exc()}", flush=True)
700
897
 
701
898
  # Fallback trace payload if no tracer but CLI needs it
702
899
  if trace_payload is None:
703
900
  record_config = getattr(request, 'record', None)
901
+ print(f"[pokemon_red] TRACE DEBUG: trace_payload is None, using fallback. record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}", flush=True)
704
902
  if record_config and getattr(record_config, 'return_trace', False):
705
903
  trace_payload = {
706
904
  "session_id": request.run_id,
@@ -718,8 +916,22 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
718
916
  "num_events": len(steps),
719
917
  "num_messages": len(steps) * 2,
720
918
  }
919
+ print(f"[pokemon_red] TRACE DEBUG: Created fallback trace_payload with keys: {list(trace_payload.keys())}", flush=True)
920
+
921
+ print(f"[pokemon_red] TRACE DEBUG: About to return RolloutResponse with trace_payload={trace_payload is not None}, keys={list(trace_payload.keys()) if trace_payload else []}", flush=True)
922
+ if trace_payload:
923
+ import json as _json_final
924
+ markov_msgs = trace_payload.get('markov_blanket_message_history', [])
925
+ event_history = trace_payload.get('event_history', [])
926
+ print(f"[pokemon_red] TRACE DEBUG: trace_payload markov_blanket_message_history length: {len(markov_msgs)}", flush=True)
927
+ print(f"[pokemon_red] TRACE DEBUG: trace_payload event_history length: {len(event_history)}", flush=True)
928
+ if markov_msgs:
929
+ print(f"[pokemon_red] TRACE DEBUG: First markov message type: {type(markov_msgs[0]) if markov_msgs else None}", flush=True)
930
+ print(f"[pokemon_red] TRACE DEBUG: First markov message (first 500 chars): {_json_final.dumps(markov_msgs[0] if markov_msgs else {}, indent=2, default=str)[:500]}", flush=True)
931
+ else:
932
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY in final trace_payload!", flush=True)
721
933
 
722
- return RolloutResponse(
934
+ response = RolloutResponse(
723
935
  run_id=request.run_id,
724
936
  trajectories=[trajectory],
725
937
  branches={},
@@ -728,6 +940,14 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
728
940
  ops_executed=len(request.ops or []),
729
941
  trace=trace_payload,
730
942
  )
943
+
944
+ # Final check: inspect what's actually in the response
945
+ if response.trace:
946
+ import json as _json_response
947
+ resp_markov = response.trace.get('markov_blanket_message_history', []) if isinstance(response.trace, dict) else []
948
+ print(f"[pokemon_red] TRACE DEBUG: Response.trace markov_blanket_message_history length: {len(resp_markov)}", flush=True)
949
+
950
+ return response
731
951
 
732
952
 
733
953
  def import_datetime():
@@ -788,11 +1008,40 @@ def build_config() -> TaskAppConfig:
788
1008
  register_task_app(
789
1009
  entry=TaskAppEntry(
790
1010
  app_id="pokemon_red",
791
- description="Pokémon Red demo task app",
1011
+ description="Pokémon Red demo task app with vision support",
792
1012
  config_factory=build_config,
793
1013
  aliases=("pokemon_red_demo",),
794
1014
  env_files=(),
795
- modal=None,
1015
+ modal=ModalDeploymentConfig(
1016
+ app_name="pokemon-red-vision-task-app",
1017
+ python_version="3.11",
1018
+ pip_packages=(
1019
+ "fastapi>=0.100.0",
1020
+ "uvicorn>=0.23.0",
1021
+ "pydantic>=2.0.0",
1022
+ "numpy>=1.24.0",
1023
+ "aiohttp>=3.8.0",
1024
+ "httpx>=0.24.0",
1025
+ "python-dotenv>=1.0.1",
1026
+ # Tracing/DB runtime deps
1027
+ "sqlalchemy>=2.0.42",
1028
+ "aiosqlite>=0.21.0",
1029
+ "greenlet>=3.2.3",
1030
+ # Pokemon Red environment
1031
+ "pyboy>=2.0.0",
1032
+ "pillow>=9.0.0",
1033
+ ),
1034
+ extra_local_dirs=(
1035
+ # Mount repo root so local modules resolve when deployed on Modal
1036
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai", "/opt/synth_ai_repo"),
1037
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai/synth_ai", "/opt/synth_ai_repo/synth_ai"),
1038
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai/examples/task_apps/pokemon_red", "/opt/synth_ai_repo/examples/task_apps/pokemon_red"),
1039
+ ),
1040
+ secret_names=("openai-api-key", "groq-api-key"),
1041
+ memory=16384,
1042
+ cpu=4.0,
1043
+ max_containers=10,
1044
+ ),
796
1045
  )
797
1046
  )
798
1047
 
@@ -20,7 +20,7 @@ Sokoban is a classic puzzle game where the player must push boxes onto target lo
20
20
  cd /path/to/synth-ai
21
21
 
22
22
  # Start the Sokoban task app on port 8911
23
- uvx synth-ai task-app serve sokoban --port 8911
23
+ uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
24
24
  ```
25
25
 
26
26
  The server will be available at `http://localhost:8911`.
@@ -283,7 +283,7 @@ lsof -i :8911
283
283
  kill -9 $(lsof -ti :8911)
284
284
 
285
285
  # Restart
286
- uvx synth-ai task-app serve sokoban --port 8911
286
+ uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
287
287
  ```
288
288
 
289
289
  ## Examples
@@ -304,4 +304,3 @@ To add new features:
304
304
  ## License
305
305
 
306
306
  MIT
307
-
@@ -1,24 +1,22 @@
1
1
  # Verilog Eval Config for Groq Qwen3-32B
2
- # Quick eval to test Verilog task app before RL training
3
-
4
- [task_app]
5
- # Update this with your Modal URL after deployment
6
- url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
2
+ # Quick eval to test the Verilog task app before RL training
7
3
 
8
4
  [eval]
9
- num_episodes = 3 # Quick test with 3 seeds
5
+ app_id = "grpo-verilog"
6
+ task_app_url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
7
+ model = "groq:qwen3-32b"
10
8
  seeds = [0, 1, 2]
11
- max_steps = 15 # More steps for Verilog compilation chains
9
+ max_turns = 15
10
+ concurrency = 1
11
+ return_trace = true
12
+ trace_format = "structured"
13
+
14
+ [eval.env_config]
15
+ difficulty = "medium"
12
16
 
13
- [policy]
17
+ [eval.policy_config]
14
18
  provider = "groq"
15
19
  model = "qwen/qwen3-32b"
16
20
  temperature = 0.2
17
21
  max_tokens = 768
18
22
  inference_url = "https://api.groq.com/openai/v1/chat/completions"
19
-
20
- [env]
21
- difficulty = "medium" # Can be "easy", "medium", or "hard"
22
-
23
-
24
-
@@ -1,7 +1,7 @@
1
1
  """Compatibility wrapper for the GRPO Verilog task app.
2
2
 
3
3
  This mirrors the Crafter task app wrapper while delegating configuration to
4
- `grpo_verilog.py`. Normal usage should prefer `uvx synth-ai serve grpo-verilog`,
4
+ `grpo_verilog.py`. Normal usage should prefer `uvx synth-ai deploy --runtime uvicorn grpo-verilog`,
5
5
  but the module remains for direct execution or importing the FastAPI app.
6
6
  """
7
7
 
@@ -1,4 +1,7 @@
1
- type = "sft"
1
+ [algorithm]
2
+ type = "offline"
3
+ method = "sft"
4
+ variety = "fft"
2
5
 
3
6
  [job]
4
7
  model = "openai/gpt-4o-mini-2024-07-18"
@@ -1,7 +1,10 @@
1
1
  # Crafter Full Finetune (FFT) example on H100
2
2
  # Adjust paths and hyperparameters to your environment before running.
3
3
 
4
- type = "sft"
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "fft"
5
8
 
6
9
  [job]
7
10
  model = "Qwen/Qwen3-4B" # base model to finetune
@@ -1,7 +1,5 @@
1
1
  # FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
2
2
 
3
- type = "sft"
4
-
5
3
  [algorithm]
6
4
  type = "offline"
7
5
  method = "supervised_finetune"
@@ -1,7 +1,5 @@
1
1
  # RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
2
2
 
3
- type = "rl"
4
-
5
3
  [algorithm]
6
4
  type = "online"
7
5
  method = "policy_gradient"
@@ -40,6 +38,7 @@ health_interval_ms = 300
40
38
  [model]
41
39
  # Base model start
42
40
  base = "Qwen/Qwen3-4B"
41
+ trainer_mode = "full"
43
42
  label = "crafter-rl-from-base"
44
43
 
45
44
  [rollout]
@@ -50,6 +49,7 @@ policy_name = "crafter-react"
50
49
  max_concurrent_rollouts = 8
51
50
  batches_per_step = 2
52
51
  ops = ["agent", "env"]
52
+ task_app_origin_rewards_only = true
53
53
 
54
54
  [evaluation]
55
55
  # Run baseline evaluation over the first 100 seeds every 20 training iterations
@@ -62,6 +62,7 @@ seeds = [
62
62
  [training]
63
63
  num_epochs = 1
64
64
  iterations_per_epoch = 10
65
+ max_turns = 10
65
66
  batch_size = 16
66
67
  group_size = 4
67
68
  gradient_accumulation_steps = 1
@@ -448,7 +448,7 @@ async def main() -> None:
448
448
 
449
449
  print(f"Ops executed: {ops}")
450
450
  print(
451
- "Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai serve …` to persist traces/SFT."
451
+ "Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai deploy --runtime uvicorn …` to persist traces/SFT."
452
452
  )
453
453
  except httpx.HTTPStatusError as exc:
454
454
  detail = (
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
6
6
 
7
7
  ## Local development
8
8
  ```bash
9
- uvx synth-ai serve grpo-crafter --port 8001
9
+ uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
10
10
  # Optional extras:
11
11
  # --env-file path/to/.env # load additional environment variables
12
12
  # --reload # enable uvicorn auto-reload