synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (889) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. examples/rl/task_app/math_single_step.py +990 -0
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +63 -0
  461. synth_ai/api/train/builders.py +473 -0
  462. synth_ai/api/train/cli.py +1185 -0
  463. synth_ai/api/train/config_finder.py +246 -0
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +352 -0
  470. synth_ai/api/train/pollers.py +91 -0
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +195 -0
  475. synth_ai/api/train/utils.py +244 -0
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +90 -45
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +166 -114
  554. synth_ai/cli/root.py +143 -112
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +3134 -0
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +745 -416
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +7 -1
  583. synth_ai/demos/demo_task_apps/core.py +75 -37
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/config.toml +55 -110
  591. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  592. synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
  593. synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
  594. synth_ai/demos/math/__init__.py +1 -0
  595. synth_ai/demos/math/_common.py +16 -0
  596. synth_ai/demos/math/app.py +38 -0
  597. synth_ai/demos/math/config.toml +76 -0
  598. synth_ai/demos/math/deploy_modal.py +54 -0
  599. synth_ai/demos/math/modal_task_app.py +703 -0
  600. synth_ai/demos/math/task_app_entry.py +51 -0
  601. synth_ai/environments/environment/core.py +7 -1
  602. synth_ai/environments/examples/bandit/engine.py +12 -5
  603. synth_ai/environments/examples/bandit/environment.py +0 -1
  604. synth_ai/environments/examples/bandit/taskset.py +4 -4
  605. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  606. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  607. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  608. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  609. synth_ai/environments/examples/enron/engine.py +7 -2
  610. synth_ai/environments/examples/enron/environment.py +68 -0
  611. synth_ai/environments/examples/red/engine.py +60 -12
  612. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  613. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  614. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  615. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  616. synth_ai/environments/examples/red/environment.py +86 -0
  617. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  618. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  619. synth_ai/environments/examples/verilog/engine.py +104 -12
  620. synth_ai/environments/examples/wordle/environment.py +0 -1
  621. synth_ai/environments/reproducibility/tree.py +5 -6
  622. synth_ai/environments/service/app.py +11 -12
  623. synth_ai/environments/service/core_routes.py +10 -9
  624. synth_ai/environments/stateful/engine.py +1 -1
  625. synth_ai/environments/tasks/core.py +1 -0
  626. synth_ai/environments/tasks/filters.py +5 -6
  627. synth_ai/environments/tasks/utils.py +4 -5
  628. synth_ai/evals/__init__.py +15 -0
  629. synth_ai/evals/base.py +14 -5
  630. synth_ai/evals/client.py +82 -0
  631. synth_ai/evals/types.py +42 -0
  632. synth_ai/http.py +8 -22
  633. synth_ai/http_client.py +45 -12
  634. synth_ai/inference/__init__.py +0 -2
  635. synth_ai/inference/client.py +21 -7
  636. synth_ai/jobs/client.py +129 -80
  637. synth_ai/judge_schemas.py +127 -0
  638. synth_ai/learning/__init__.py +51 -6
  639. synth_ai/learning/algorithms.py +14 -0
  640. synth_ai/learning/client.py +122 -30
  641. synth_ai/learning/config.py +2 -40
  642. synth_ai/learning/constants.py +0 -2
  643. synth_ai/learning/ft_client.py +4 -56
  644. synth_ai/learning/health.py +14 -8
  645. synth_ai/learning/jobs.py +43 -47
  646. synth_ai/learning/prompt_learning_client.py +276 -0
  647. synth_ai/learning/prompt_learning_types.py +185 -0
  648. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  649. synth_ai/learning/rl/client.py +269 -0
  650. synth_ai/learning/rl/config.py +31 -0
  651. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  652. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  653. synth_ai/learning/rl/secrets.py +13 -0
  654. synth_ai/learning/rl_client.py +2 -253
  655. synth_ai/learning/sft/__init__.py +29 -0
  656. synth_ai/learning/sft/client.py +68 -0
  657. synth_ai/learning/sft/config.py +270 -0
  658. synth_ai/learning/sft/data.py +698 -0
  659. synth_ai/learning/sse.py +25 -26
  660. synth_ai/learning/validators.py +29 -25
  661. synth_ai/mcp/__init__.py +5 -0
  662. synth_ai/mcp/__main__.py +8 -0
  663. synth_ai/mcp/main.py +254 -0
  664. synth_ai/mcp/setup.py +100 -0
  665. synth_ai/modal.py +257 -0
  666. synth_ai/pricing/__init__.py +3 -0
  667. synth_ai/pricing/model_pricing.py +64 -0
  668. synth_ai/session/__init__.py +75 -0
  669. synth_ai/session/client.py +383 -0
  670. synth_ai/session/constants.py +63 -0
  671. synth_ai/session/exceptions.py +105 -0
  672. synth_ai/session/manager.py +139 -0
  673. synth_ai/session/models.py +89 -0
  674. synth_ai/session/query.py +110 -0
  675. synth_ai/spec/__init__.py +46 -0
  676. synth_ai/spec/dataclasses.py +149 -0
  677. synth_ai/spec/loader.py +144 -0
  678. synth_ai/spec/serializer.py +199 -0
  679. synth_ai/spec/validation.py +250 -0
  680. synth_ai/streaming/__init__.py +29 -0
  681. synth_ai/streaming/config.py +94 -0
  682. synth_ai/streaming/handlers.py +589 -0
  683. synth_ai/streaming/streamer.py +320 -0
  684. synth_ai/streaming/types.py +95 -0
  685. synth_ai/task/__init__.py +116 -3
  686. synth_ai/task/apps/__init__.py +132 -0
  687. synth_ai/task/auth.py +165 -0
  688. synth_ai/task/client.py +167 -0
  689. synth_ai/task/config.py +261 -0
  690. synth_ai/task/contracts.py +173 -57
  691. synth_ai/task/datasets.py +108 -0
  692. synth_ai/task/errors.py +50 -0
  693. synth_ai/task/health.py +17 -11
  694. synth_ai/task/inference_api.py +101 -0
  695. synth_ai/task/json.py +111 -0
  696. synth_ai/task/proxy.py +251 -0
  697. synth_ai/task/rubrics/__init__.py +55 -0
  698. synth_ai/task/rubrics/loaders.py +156 -0
  699. synth_ai/task/rubrics/models.py +57 -0
  700. synth_ai/task/rubrics/scoring.py +116 -0
  701. synth_ai/task/rubrics/strict.py +149 -0
  702. synth_ai/task/rubrics.py +219 -0
  703. synth_ai/task/server.py +432 -0
  704. synth_ai/task/trace_correlation_helpers.py +328 -0
  705. synth_ai/task/tracing_utils.py +95 -0
  706. synth_ai/task/validators.py +449 -6
  707. synth_ai/task/vendors.py +59 -0
  708. synth_ai/tracing_v3/__init__.py +4 -0
  709. synth_ai/tracing_v3/abstractions.py +21 -4
  710. synth_ai/tracing_v3/config.py +167 -22
  711. synth_ai/tracing_v3/constants.py +21 -0
  712. synth_ai/tracing_v3/db_config.py +42 -29
  713. synth_ai/tracing_v3/decorators.py +80 -45
  714. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  715. synth_ai/tracing_v3/hooks.py +6 -4
  716. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  717. synth_ai/tracing_v3/migration_helper.py +1 -2
  718. synth_ai/tracing_v3/replica_sync.py +12 -7
  719. synth_ai/tracing_v3/serialization.py +130 -0
  720. synth_ai/tracing_v3/session_tracer.py +86 -21
  721. synth_ai/tracing_v3/storage/base.py +98 -12
  722. synth_ai/tracing_v3/storage/config.py +63 -16
  723. synth_ai/tracing_v3/storage/factory.py +11 -9
  724. synth_ai/tracing_v3/storage/utils.py +15 -11
  725. synth_ai/tracing_v3/trace_utils.py +317 -0
  726. synth_ai/tracing_v3/turso/__init__.py +8 -21
  727. synth_ai/tracing_v3/turso/daemon.py +123 -15
  728. synth_ai/tracing_v3/turso/models.py +5 -2
  729. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  730. synth_ai/tracing_v3/utils.py +5 -4
  731. synth_ai/tunnel.py +143 -0
  732. synth_ai/tunnel_deploy.py +278 -0
  733. synth_ai/types.py +8 -0
  734. synth_ai/urls.py +11 -0
  735. synth_ai/utils/__init__.py +166 -0
  736. synth_ai/utils/agents.py +74 -0
  737. synth_ai/utils/apps.py +152 -0
  738. synth_ai/utils/base_url.py +94 -0
  739. synth_ai/utils/bin.py +39 -0
  740. synth_ai/utils/claude.py +36 -0
  741. synth_ai/utils/cli.py +284 -0
  742. synth_ai/utils/config.py +81 -0
  743. synth_ai/utils/env.py +346 -0
  744. synth_ai/utils/errors.py +85 -0
  745. synth_ai/utils/http.py +172 -0
  746. synth_ai/utils/json.py +72 -0
  747. synth_ai/utils/log_filter.py +99 -0
  748. synth_ai/utils/logging.py +198 -0
  749. synth_ai/utils/modal.py +299 -0
  750. synth_ai/utils/paths.py +95 -0
  751. synth_ai/utils/process.py +233 -0
  752. synth_ai/utils/prompts.py +39 -0
  753. synth_ai/utils/sqld.py +122 -0
  754. synth_ai/utils/ssl.py +25 -0
  755. synth_ai/utils/task_app_discovery.py +882 -0
  756. synth_ai/utils/task_app_env.py +186 -0
  757. synth_ai/utils/task_app_state.py +318 -0
  758. synth_ai/utils/tunnel/__init__.py +12 -0
  759. synth_ai/utils/tunnel/config.py +55 -0
  760. synth_ai/utils/user_config.py +137 -0
  761. synth_ai/uvicorn.py +77 -0
  762. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  763. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  764. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  765. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  766. synth_ai/cli/man.py +0 -106
  767. synth_ai/core/experiment.py +0 -15
  768. synth_ai/core/system.py +0 -15
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -63
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/tracing/__init__.py +0 -30
  838. synth_ai/tracing_v1/__init__.py +0 -33
  839. synth_ai/tracing_v3/turso/manager.py +0 -760
  840. synth_ai/v0/tracing/abstractions.py +0 -224
  841. synth_ai/v0/tracing/base_client.py +0 -91
  842. synth_ai/v0/tracing/client_manager.py +0 -131
  843. synth_ai/v0/tracing/config.py +0 -142
  844. synth_ai/v0/tracing/context.py +0 -146
  845. synth_ai/v0/tracing/decorators.py +0 -682
  846. synth_ai/v0/tracing/events/__init__.py +0 -0
  847. synth_ai/v0/tracing/events/manage.py +0 -147
  848. synth_ai/v0/tracing/events/scope.py +0 -86
  849. synth_ai/v0/tracing/events/store.py +0 -228
  850. synth_ai/v0/tracing/immediate_client.py +0 -151
  851. synth_ai/v0/tracing/local.py +0 -18
  852. synth_ai/v0/tracing/log_client_base.py +0 -73
  853. synth_ai/v0/tracing/retry_queue.py +0 -186
  854. synth_ai/v0/tracing/trackers.py +0 -515
  855. synth_ai/v0/tracing/upload.py +0 -512
  856. synth_ai/v0/tracing/utils.py +0 -9
  857. synth_ai/v0/tracing_v1/__init__.py +0 -16
  858. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  859. synth_ai/v0/tracing_v1/base_client.py +0 -91
  860. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  861. synth_ai/v0/tracing_v1/config.py +0 -142
  862. synth_ai/v0/tracing_v1/context.py +0 -146
  863. synth_ai/v0/tracing_v1/decorators.py +0 -703
  864. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  865. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  866. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  867. synth_ai/v0/tracing_v1/events/store.py +0 -228
  868. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  869. synth_ai/v0/tracing_v1/local.py +0 -18
  870. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  871. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  872. synth_ai/v0/tracing_v1/trackers.py +0 -515
  873. synth_ai/v0/tracing_v1/upload.py +0 -527
  874. synth_ai/v0/tracing_v1/utils.py +0 -9
  875. synth_ai/zyk/__init__.py +0 -30
  876. synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
  877. synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
  878. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  879. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  880. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  881. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  882. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  885. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  886. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  887. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  888. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  889. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1226 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import json
5
+ import logging
6
+ from typing import Any
7
+ from uuid import uuid4
8
+
9
+ from fastapi import APIRouter, HTTPException
10
+ from pydantic import BaseModel
11
+
12
+ # Import the actual classes from synth-ai
13
+ from synth_ai.environments.examples.crafter_classic.environment import (
14
+ CrafterClassicEnvironment,
15
+ )
16
+ from synth_ai.environments.examples.crafter_classic.taskset import (
17
+ CrafterTaskInstance,
18
+ CrafterTaskInstanceMetadata,
19
+ )
20
+ from synth_ai.environments.tasks.core import Impetus, Intent
21
+
22
+ from .envs.crafter.environment import CrafterEnvironmentWrapper
23
+ from .registry import registry
24
+ from .storage.volume import storage
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ router = APIRouter()
29
+
30
+
31
+ async def validate_environment_observation(observation: Any, context: str) -> None:
32
+ """
33
+ Validate that an environment observation has the correct structure.
34
+ This ensures the environment wrapper is producing valid observations.
35
+
36
+ Args:
37
+ observation: The observation to validate
38
+ context: Context string for logging (e.g., "initialize", "step")
39
+ """
40
+ if observation is None:
41
+ raise ValueError(f"Environment observation cannot be None in {context}")
42
+
43
+ if not isinstance(observation, dict):
44
+ raise ValueError(
45
+ f"Environment observation must be dict in {context}, got {type(observation)}"
46
+ )
47
+
48
+ # For Wordle environments, validate specific structure
49
+ # Check if this looks like a Wordle observation by looking for Wordle-specific keys
50
+ wordle_keys = {
51
+ "text",
52
+ "status",
53
+ "remaining_guesses",
54
+ "guesses",
55
+ "feedback",
56
+ "reward_last",
57
+ "total_reward",
58
+ "terminated",
59
+ }
60
+ if wordle_keys.issubset(set(observation.keys())):
61
+ logger.info(f"🔍 ENV_ROUTES: Validating Wordle observation structure in {context}")
62
+ logger.info(f"🔍 ENV_ROUTES: Observation keys: {list(observation.keys())}")
63
+
64
+ missing_keys = wordle_keys - set(observation.keys())
65
+ if missing_keys:
66
+ logger.error(
67
+ f"❌ ENV_ROUTES: Wordle observation missing required keys in {context}: {missing_keys}"
68
+ )
69
+ logger.error(f"❌ ENV_ROUTES: Full observation: {observation}")
70
+ raise ValueError(
71
+ f"Wordle observation missing required keys in {context}: {missing_keys}"
72
+ )
73
+
74
+ # Validate data types
75
+ if not isinstance(observation.get("text"), str):
76
+ raise ValueError(
77
+ f"Wordle observation 'text' must be string in {context}, got {type(observation.get('text'))}"
78
+ )
79
+
80
+ if not isinstance(observation.get("guesses"), list):
81
+ raise ValueError(
82
+ f"Wordle observation 'guesses' must be list in {context}, got {type(observation.get('guesses'))}"
83
+ )
84
+
85
+ if not isinstance(observation.get("feedback"), list):
86
+ raise ValueError(
87
+ f"Wordle observation 'feedback' must be list in {context}, got {type(observation.get('feedback'))}"
88
+ )
89
+
90
+ logger.info(
91
+ f"✅ ENV_ROUTES: Wordle observation structure validated successfully in {context}"
92
+ )
93
+ else:
94
+ logger.debug(
95
+ f"🔍 ENV_ROUTES: Observation doesn't appear to be Wordle in {context}, skipping validation"
96
+ )
97
+
98
+
99
+ class EnvCreateRequest(BaseModel):
100
+ env_name: str
101
+ config: dict[str, Any] = {}
102
+ seed: int | None = None
103
+ parent_env_id: str | None = None
104
+ rl_run_id: str
105
+
106
+
107
+ class EnvCreateResponse(BaseModel):
108
+ env_id: str
109
+ observation: dict[str, Any]
110
+ info: dict[str, Any] | None = None
111
+ step_idx: int
112
+
113
+
114
+ class EnvResetRequest(BaseModel):
115
+ env_id: str
116
+ seed: int | None = None
117
+
118
+
119
+ class EnvResetResponse(BaseModel):
120
+ observation: dict[str, Any]
121
+ info: dict[str, Any] | None = None
122
+ step_idx: int
123
+
124
+
125
+ class EnvStepRequest(BaseModel):
126
+ env_id: str
127
+ tool_calls: list[dict[str, Any]]
128
+
129
+
130
+ class EnvStepResponse(BaseModel):
131
+ observation: dict[str, Any]
132
+ done: bool
133
+ info: dict[str, Any] | None = None
134
+ reward: float | None = None
135
+ truncated: bool | None = None
136
+ step_idx: int
137
+
138
+
139
+ class EnvSnapshotRequest(BaseModel):
140
+ env_id: str
141
+
142
+
143
+ class EnvSnapshotResponse(BaseModel):
144
+ snapshot_id: str
145
+ path: str
146
+ rl_run_id: str
147
+ size: int
148
+
149
+
150
+ class EnvRestoreRequest(BaseModel):
151
+ snapshot_id: str
152
+
153
+
154
+ class EnvRestoreResponse(BaseModel):
155
+ env_id: str
156
+ observation: dict[str, Any]
157
+ info: dict[str, Any] | None = None
158
+ step_idx: int
159
+
160
+
161
+ class EnvTerminateRequest(BaseModel):
162
+ env_id: str
163
+
164
+
165
+ class EnvTerminateResponse(BaseModel):
166
+ ok: bool
167
+
168
+
169
+ @router.post("/create", response_model=EnvCreateResponse)
170
+ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
171
+ """Create a new environment instance."""
172
+ try:
173
+ # Create the underlying synth-ai environment
174
+ env_name_lower = request.env_name.lower()
175
+ if env_name_lower == "crafter":
176
+ # Build a minimal Crafter task instance
177
+ difficulty = (request.config or {}).get("difficulty", "normal")
178
+ seed_value = request.seed if request.seed is not None else 0
179
+ # Task object is part of the ecosystem; not required for instantiation here
180
+ impetus = Impetus(instructions="Survive and unlock achievements.")
181
+ intent = Intent(
182
+ rubric={"goal": "Unlock achievements"},
183
+ gold_trajectories=None,
184
+ gold_state_diff={},
185
+ )
186
+ metadata = CrafterTaskInstanceMetadata(
187
+ difficulty=difficulty,
188
+ seed=seed_value,
189
+ num_trees_radius=0,
190
+ num_cows_radius=0,
191
+ num_hostiles_radius=0,
192
+ )
193
+ instance = CrafterTaskInstance(
194
+ id=uuid4(),
195
+ impetus=impetus,
196
+ intent=intent,
197
+ metadata=metadata,
198
+ is_reproducible=True,
199
+ initial_engine_snapshot=None,
200
+ )
201
+ # Create CrafterClassicEnvironment from task instance
202
+ base_env = CrafterClassicEnvironment(task_instance=instance)
203
+
204
+ # Wrap it for our API
205
+ wrapper = CrafterEnvironmentWrapper(
206
+ env=base_env,
207
+ seed=request.seed,
208
+ )
209
+
210
+ # Initialize the environment
211
+ result = await wrapper.initialize()
212
+
213
+ # Log a world signature for sanity: seed + starting public state hash
214
+ try:
215
+ pub_state = base_env.engine._get_public_state_from_env() # type: ignore[attr-defined]
216
+ import hashlib
217
+ import json as _json
218
+
219
+ sig_src = {
220
+ "player_position": list(pub_state.player_position),
221
+ "player_direction": pub_state.player_direction,
222
+ "semantic_map": pub_state.semantic_map,
223
+ "inventory": {k: v for k, v in pub_state.inventory.items() if v},
224
+ }
225
+ sig_str = _json.dumps(sig_src, sort_keys=True)
226
+ sig = hashlib.md5(sig_str.encode("utf-8")).hexdigest()[:12]
227
+ logger.info(
228
+ "Crafter init signature: seed=%s sig=%s pos=%s inv=%s",
229
+ str(seed_value),
230
+ sig,
231
+ list(pub_state.player_position),
232
+ {k: v for k, v in pub_state.inventory.items() if v},
233
+ )
234
+ except Exception as _e:
235
+ pass
236
+
237
+ # Handle the observation structure consistently
238
+ # For Crafter, the result might still have the old nested structure, so we need to handle both
239
+ if isinstance(result, dict) and "observation" in result:
240
+ # Old nested structure - extract the inner observation
241
+ observation_for_registry = result["observation"].copy()
242
+ else:
243
+ # New flat structure - remove non-observation fields
244
+ observation_for_registry = result.copy()
245
+ for key in ["step_idx", "info"]:
246
+ if key in observation_for_registry:
247
+ del observation_for_registry[key]
248
+
249
+ # Register in memory
250
+ env_id = registry.register_env(
251
+ env=wrapper,
252
+ seed=request.seed,
253
+ rl_run_id=request.rl_run_id,
254
+ last_observation=observation_for_registry,
255
+ last_info=result.get("info"),
256
+ )
257
+
258
+ # Update step index in registry
259
+ handle = registry.get_env(env_id)
260
+ if handle:
261
+ handle.step_idx = result["step_idx"]
262
+
263
+ return EnvCreateResponse(
264
+ env_id=env_id,
265
+ observation=observation_for_registry,
266
+ info=result.get("info"),
267
+ step_idx=result["step_idx"],
268
+ )
269
+ elif env_name_lower == "wordle":
270
+ # Defer imports to avoid hard dependency when not used
271
+ try:
272
+ from synth_ai.environments.examples.wordle.environment import (
273
+ WordleEnvironment,
274
+ )
275
+ from synth_ai.environments.examples.wordle.taskset import (
276
+ WordleTaskInstance,
277
+ WordleTaskInstanceMetadata,
278
+ )
279
+ except Exception as e:
280
+ raise HTTPException(
281
+ status_code=500, detail=f"Wordle modules unavailable: {e}"
282
+ ) from e
283
+
284
+ # Lazy import of wrapper within branch
285
+ try:
286
+ from .envs.wordle.environment import WordleEnvironmentWrapper
287
+ except Exception as e:
288
+ raise HTTPException(
289
+ status_code=500, detail=f"Wordle wrapper unavailable: {e}"
290
+ ) from e
291
+ else:
292
+ wordle_wrapper_cls = WordleEnvironmentWrapper
293
+
294
+ cfg = request.config or {}
295
+ word_length = int(cfg.get("word_length", 5))
296
+ max_guesses = int(cfg.get("max_guesses", 6))
297
+
298
+ # Build a single Wordle task instance with proper seed usage
299
+ md = WordleTaskInstanceMetadata(
300
+ word_length=word_length,
301
+ max_guesses=max_guesses,
302
+ target_word=None, # Let seed determine the word
303
+ enforce_wordlist=True,
304
+ seed=request.seed,
305
+ consume_invalid_attempts=True,
306
+ )
307
+ instance = WordleTaskInstance(
308
+ id=uuid4(),
309
+ impetus=Impetus(instructions="Play Wordle. Submit one 5-letter word per turn."),
310
+ intent=Intent(rubric="guess the word", gold_trajectories=None, gold_state_diff={}),
311
+ metadata=md,
312
+ is_reproducible=True,
313
+ initial_engine_snapshot=None,
314
+ )
315
+ base_env = WordleEnvironment(task_instance=instance)
316
+
317
+ # Try to preserve the exact puzzle snapshot for reproducibility
318
+ init_snap = getattr(instance, "initial_engine_snapshot", None)
319
+
320
+ wrapper = wordle_wrapper_cls(
321
+ env=base_env,
322
+ seed=request.seed,
323
+ word_length=word_length,
324
+ max_guesses=max_guesses,
325
+ initial_engine_snapshot=init_snap,
326
+ )
327
+
328
+ result = await wrapper.initialize()
329
+
330
+ # Validate Wordle observation structure
331
+ # After our fix, the result is now flat, so we need to extract the observation fields
332
+ # that should be passed to the registry and response
333
+ if isinstance(result, dict) and "observation" in result:
334
+ # Old nested structure - extract the inner observation
335
+ observation_for_registry = result["observation"].copy()
336
+ else:
337
+ # New flat structure - remove non-observation fields
338
+ observation_for_registry = result.copy()
339
+ for key in ["step_idx", "info"]:
340
+ if key in observation_for_registry:
341
+ del observation_for_registry[key]
342
+
343
+ await validate_environment_observation(observation_for_registry, "initialize")
344
+
345
+ env_id = registry.register_env(
346
+ env=wrapper,
347
+ seed=request.seed,
348
+ rl_run_id=request.rl_run_id,
349
+ last_observation=observation_for_registry,
350
+ last_info=result.get("info"),
351
+ )
352
+ handle = registry.get_env(env_id)
353
+ if handle:
354
+ handle.step_idx = result["step_idx"]
355
+ return EnvCreateResponse(
356
+ env_id=env_id,
357
+ observation=observation_for_registry,
358
+ info=result.get("info"),
359
+ step_idx=result["step_idx"],
360
+ )
361
+
362
+ elif env_name_lower == "sokoban":
363
+ try:
364
+ from synth_ai.environments.examples.sokoban.environment import (
365
+ SokobanEnvironment,
366
+ )
367
+ from synth_ai.environments.examples.sokoban.taskset import (
368
+ SokobanTaskInstance,
369
+ SokobanTaskInstanceMetadata,
370
+ )
371
+ except Exception as e:
372
+ raise HTTPException(
373
+ status_code=500, detail=f"Sokoban modules unavailable: {e}"
374
+ ) from e
375
+
376
+ # Lazy import of wrapper within branch
377
+ try:
378
+ from .envs.sokoban.environment import SokobanEnvironmentWrapper
379
+ except Exception as e:
380
+ raise HTTPException(
381
+ status_code=500, detail=f"Sokoban wrapper unavailable: {e}"
382
+ ) from e
383
+
384
+ cfg = request.config or {}
385
+ difficulty = cfg.get("difficulty", "easy")
386
+ initial_state = cfg.get("initial_state") # Optional engine snapshot
387
+
388
+ metadata = SokobanTaskInstanceMetadata(
389
+ difficulty=difficulty,
390
+ )
391
+ instance = SokobanTaskInstance(
392
+ id=uuid4(),
393
+ impetus=Impetus(instructions="Push boxes to targets."),
394
+ intent=Intent(
395
+ rubric={"goal": "Solve the Sokoban puzzle"},
396
+ gold_trajectories=None,
397
+ gold_state_diff={},
398
+ ),
399
+ metadata=metadata,
400
+ is_reproducible=True,
401
+ initial_engine_snapshot=initial_state,
402
+ )
403
+ base_env = SokobanEnvironment(task_instance=instance)
404
+
405
+ wrapper = SokobanEnvironmentWrapper(env=base_env, seed=request.seed, config=cfg)
406
+ result = await wrapper.initialize()
407
+
408
+ # Handle the observation structure consistently for Sokoban
409
+ if isinstance(result, dict) and "observation" in result:
410
+ # Old nested structure - extract the inner observation
411
+ observation_for_registry = result["observation"].copy()
412
+ else:
413
+ # New flat structure - remove non-observation fields
414
+ observation_for_registry = result.copy()
415
+ for key in ["step_idx", "info"]:
416
+ if key in observation_for_registry:
417
+ del observation_for_registry[key]
418
+
419
+ env_id = registry.register_env(
420
+ env=wrapper,
421
+ seed=request.seed,
422
+ rl_run_id=request.rl_run_id,
423
+ last_observation=observation_for_registry,
424
+ last_info=result.get("info"),
425
+ )
426
+ handle = registry.get_env(env_id)
427
+ if handle:
428
+ handle.step_idx = result["step_idx"]
429
+ return EnvCreateResponse(
430
+ env_id=env_id,
431
+ observation=observation_for_registry,
432
+ info=result.get("info"),
433
+ step_idx=result["step_idx"],
434
+ )
435
+
436
+ elif env_name_lower == "math":
437
+ # Single-step math env (GSM8K-style)
438
+ cfg = request.config or {}
439
+ # Lazy import of wrapper within branch
440
+ try:
441
+ from .envs.math.environment import MathEnvironmentWrapper
442
+ except Exception as e:
443
+ raise HTTPException(status_code=500, detail=f"Math wrapper unavailable: {e}") from e
444
+
445
+ wrapper = MathEnvironmentWrapper(
446
+ seed=request.seed,
447
+ problem_id=cfg.get("problem_id"),
448
+ problem_text=cfg.get("problem_text"),
449
+ )
450
+ result = await wrapper.initialize()
451
+
452
+ observation_for_registry = (
453
+ result["observation"].copy()
454
+ if isinstance(result, dict) and "observation" in result
455
+ else result.copy()
456
+ )
457
+ for key in ["step_idx", "info"]:
458
+ if key in observation_for_registry:
459
+ del observation_for_registry[key]
460
+
461
+ env_id = registry.register_env(
462
+ env=wrapper,
463
+ seed=request.seed,
464
+ rl_run_id=request.rl_run_id,
465
+ last_observation=observation_for_registry,
466
+ last_info=result.get("info"),
467
+ )
468
+ handle = registry.get_env(env_id)
469
+ if handle:
470
+ handle.step_idx = result["step_idx"]
471
+ return EnvCreateResponse(
472
+ env_id=env_id,
473
+ observation=observation_for_registry,
474
+ info=result.get("info"),
475
+ step_idx=result["step_idx"],
476
+ )
477
+ else:
478
+ raise HTTPException(
479
+ status_code=422,
480
+ detail=f"Unknown environment name: {request.env_name}",
481
+ )
482
+
483
+ except Exception as e:
484
+ logger.error(f"Failed to create environment: {e}")
485
+ raise HTTPException(status_code=500, detail=str(e)) from e
486
+
487
+
488
+ # --- Compatibility routes for existing eval scripts that expect CrafterClassic paths ---
489
+ @router.post("/CrafterClassic/initialize", response_model=EnvCreateResponse)
490
+ async def compat_initialize(payload: dict) -> EnvCreateResponse:
491
+ seed = payload.get("seed")
492
+ wc = payload.get("world_config")
493
+ cfg = payload.get("config")
494
+ difficulty: str = "normal"
495
+ if isinstance(wc, str) and wc:
496
+ difficulty = wc
497
+ elif isinstance(wc, dict) and wc.get("difficulty"):
498
+ difficulty = str(wc.get("difficulty"))
499
+ elif isinstance(cfg, dict) and cfg.get("difficulty"):
500
+ difficulty = str(cfg.get("difficulty"))
501
+ req = EnvCreateRequest(
502
+ env_name="crafter", config={"difficulty": difficulty}, seed=seed, rl_run_id="eval"
503
+ )
504
+ return await create_environment(req)
505
+
506
+
507
+ @router.post("/CrafterClassic/step", response_model=EnvStepResponse)
508
+ async def compat_step(payload: dict) -> EnvStepResponse:
509
+ env_id = payload.get("env_id")
510
+ # eval script wraps action as {"tool_calls":[{"tool":"interact","args":{"action": <id>}}]}
511
+ action = payload.get("action") or {}
512
+ tool_calls = action.get("tool_calls") if isinstance(action, dict) else None
513
+ if not isinstance(tool_calls, list):
514
+ tool_calls = []
515
+ # Fallback: support {action: {actions: [..]}} by expanding into tool_calls
516
+ actions_list = action.get("actions") if isinstance(action, dict) else None
517
+ if isinstance(actions_list, list) and actions_list:
518
+ for a in actions_list:
519
+ tool_calls.append(
520
+ {
521
+ "tool": "interact",
522
+ "args": {"action": a},
523
+ }
524
+ )
525
+ req = EnvStepRequest(env_id=env_id, tool_calls=tool_calls)
526
+ return await step_environment(req)
527
+
528
+
529
+ @router.post("/CrafterClassic/terminate", response_model=EnvTerminateResponse)
530
+ async def compat_terminate(payload: dict) -> EnvTerminateResponse:
531
+ env_id = payload.get("env_id")
532
+ req = EnvTerminateRequest(env_id=env_id)
533
+ return await terminate_environment(req)
534
+
535
+
536
+ @router.post("/reset", response_model=EnvResetResponse)
537
+ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
538
+ """Reset an environment to its initial state."""
539
+ handle = registry.get_env(request.env_id)
540
+ if not handle:
541
+ raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
542
+
543
+ try:
544
+ # Determine wrapper type and rebuild base env if a new seed is provided
545
+ wrapper = handle.env
546
+ if isinstance(wrapper, CrafterEnvironmentWrapper):
547
+ if request.seed is not None:
548
+ try:
549
+ difficulty = "normal"
550
+ seed_value = int(request.seed)
551
+ metadata = CrafterTaskInstanceMetadata(
552
+ difficulty=difficulty,
553
+ seed=seed_value,
554
+ num_trees_radius=0,
555
+ num_cows_radius=0,
556
+ num_hostiles_radius=0,
557
+ )
558
+ instance = CrafterTaskInstance(
559
+ id=uuid4(),
560
+ impetus=Impetus(instructions="Reset"),
561
+ intent=Intent(
562
+ rubric={"goal": "Reset"},
563
+ gold_trajectories=None,
564
+ gold_state_diff={},
565
+ ),
566
+ metadata=metadata,
567
+ is_reproducible=True,
568
+ initial_engine_snapshot=None,
569
+ )
570
+ new_base_env = CrafterClassicEnvironment(task_instance=instance)
571
+ wrapper.env = new_base_env
572
+ wrapper.seed = seed_value
573
+ handle.seed = seed_value
574
+ except Exception:
575
+ wrapper.seed = request.seed
576
+ handle.seed = request.seed
577
+
578
+ elif True:
579
+ # Try to dynamically import Wordle wrapper and check instance safely
580
+ wordle_wrapper_cls = None
581
+ with contextlib.suppress(Exception):
582
+ from .envs.wordle.environment import WordleEnvironmentWrapper
583
+
584
+ wordle_wrapper_cls = WordleEnvironmentWrapper # type: ignore[assignment]
585
+
586
+ if wordle_wrapper_cls is not None and isinstance(wrapper, wordle_wrapper_cls):
587
+ # Rebuild Wordle env with the same configuration; if we have a preserved
588
+ # initial_engine_snapshot, prefer constructing the instance directly.
589
+ try:
590
+ from synth_ai.environments.examples.wordle.environment import (
591
+ WordleEnvironment,
592
+ )
593
+ from synth_ai.environments.examples.wordle.taskset import (
594
+ WordleTaskInstance,
595
+ WordleTaskInstanceMetadata,
596
+ create_wordle_taskset,
597
+ )
598
+ except Exception as e:
599
+ raise HTTPException(
600
+ status_code=500, detail=f"Wordle modules unavailable: {e}"
601
+ ) from e
602
+
603
+ init_snap = getattr(wrapper, "initial_engine_snapshot", None)
604
+ if init_snap is not None:
605
+ metadata = WordleTaskInstanceMetadata(
606
+ word_length=int(wrapper.word_length),
607
+ max_guesses=int(wrapper.max_guesses),
608
+ )
609
+ instance = WordleTaskInstance(
610
+ id=uuid4(),
611
+ impetus=Impetus(instructions="Reset"),
612
+ intent=Intent(
613
+ rubric={"goal": "Reset"},
614
+ gold_trajectories=None,
615
+ gold_state_diff={},
616
+ ),
617
+ metadata=metadata,
618
+ is_reproducible=True,
619
+ initial_engine_snapshot=init_snap,
620
+ )
621
+ new_base_env = WordleEnvironment(task_instance=instance)
622
+ else:
623
+ ts = await create_wordle_taskset(
624
+ sample_size=1,
625
+ word_length=int(wrapper.word_length),
626
+ max_guesses=int(wrapper.max_guesses),
627
+ )
628
+ instance = ts.instances[0]
629
+ new_base_env = WordleEnvironment(task_instance=instance)
630
+ wrapper.env = new_base_env
631
+ if request.seed is not None:
632
+ wrapper.seed = int(request.seed)
633
+ handle.seed = int(request.seed)
634
+ else:
635
+ pass
636
+ # Rebuild Wordle env with the same configuration; if we have a preserved
637
+ # initial_engine_snapshot, prefer constructing the instance directly.
638
+ try:
639
+ from synth_ai.environments.examples.wordle.environment import (
640
+ WordleEnvironment,
641
+ )
642
+ from synth_ai.environments.examples.wordle.taskset import (
643
+ WordleTaskInstance,
644
+ WordleTaskInstanceMetadata,
645
+ create_wordle_taskset,
646
+ )
647
+ except Exception as e:
648
+ raise HTTPException(
649
+ status_code=500, detail=f"Wordle modules unavailable: {e}"
650
+ ) from e
651
+
652
+ init_snap = getattr(wrapper, "initial_engine_snapshot", None)
653
+ if init_snap is not None:
654
+ metadata = WordleTaskInstanceMetadata(
655
+ word_length=int(wrapper.word_length),
656
+ max_guesses=int(wrapper.max_guesses),
657
+ )
658
+ instance = WordleTaskInstance(
659
+ id=uuid4(),
660
+ impetus=Impetus(instructions="Reset"),
661
+ intent=Intent(
662
+ rubric={"goal": "Reset"},
663
+ gold_trajectories=None,
664
+ gold_state_diff={},
665
+ ),
666
+ metadata=metadata,
667
+ is_reproducible=True,
668
+ initial_engine_snapshot=init_snap,
669
+ )
670
+ new_base_env = WordleEnvironment(task_instance=instance)
671
+ else:
672
+ ts = await create_wordle_taskset(
673
+ sample_size=1,
674
+ word_length=int(wrapper.word_length),
675
+ max_guesses=int(wrapper.max_guesses),
676
+ )
677
+ instance = ts.instances[0]
678
+ new_base_env = WordleEnvironment(task_instance=instance)
679
+ wrapper.env = new_base_env
680
+ if request.seed is not None:
681
+ wrapper.seed = int(request.seed)
682
+ handle.seed = int(request.seed)
683
+
684
+ elif True:
685
+ # Try to dynamically import Sokoban wrapper and check instance safely
686
+ sokoban_wrapper_cls = None
687
+ with contextlib.suppress(Exception):
688
+ from .envs.sokoban.environment import SokobanEnvironmentWrapper
689
+
690
+ sokoban_wrapper_cls = SokobanEnvironmentWrapper # type: ignore[assignment]
691
+
692
+ if sokoban_wrapper_cls is not None and isinstance(wrapper, sokoban_wrapper_cls):
693
+ # Rebuild Sokoban env using stored config snapshot
694
+ try:
695
+ from synth_ai.environments.examples.sokoban.environment import (
696
+ SokobanEnvironment,
697
+ )
698
+ from synth_ai.environments.examples.sokoban.taskset import (
699
+ SokobanTaskInstance,
700
+ SokobanTaskInstanceMetadata,
701
+ )
702
+ except Exception as e:
703
+ raise HTTPException(
704
+ status_code=500, detail=f"Sokoban modules unavailable: {e}"
705
+ ) from e
706
+
707
+ cfg = dict(wrapper.config or {})
708
+ metadata = SokobanTaskInstanceMetadata(
709
+ difficulty=cfg.get("difficulty", "easy"),
710
+ )
711
+ instance = SokobanTaskInstance(
712
+ id=uuid4(),
713
+ impetus=Impetus(instructions="Reset"),
714
+ intent=Intent(
715
+ rubric={"goal": "Reset"}, gold_trajectories=None, gold_state_diff={}
716
+ ),
717
+ metadata=metadata,
718
+ is_reproducible=True,
719
+ initial_engine_snapshot=cfg.get("initial_state"),
720
+ )
721
+ new_base_env = SokobanEnvironment(task_instance=instance)
722
+ wrapper.env = new_base_env
723
+ if request.seed is not None:
724
+ wrapper.seed = int(request.seed)
725
+ handle.seed = int(request.seed)
726
+ else:
727
+ pass
728
+ # Rebuild Sokoban env using stored config snapshot
729
+ try:
730
+ from synth_ai.environments.examples.sokoban.environment import (
731
+ SokobanEnvironment,
732
+ )
733
+ from synth_ai.environments.examples.sokoban.taskset import (
734
+ SokobanTaskInstance,
735
+ SokobanTaskInstanceMetadata,
736
+ )
737
+ except Exception as e:
738
+ raise HTTPException(
739
+ status_code=500, detail=f"Sokoban modules unavailable: {e}"
740
+ ) from e
741
+
742
+ cfg = dict(wrapper.config or {})
743
+ metadata = SokobanTaskInstanceMetadata(
744
+ difficulty=cfg.get("difficulty", "easy"),
745
+ )
746
+ instance = SokobanTaskInstance(
747
+ id=uuid4(),
748
+ impetus=Impetus(instructions="Reset"),
749
+ intent=Intent(rubric={"goal": "Reset"}, gold_trajectories=None, gold_state_diff={}),
750
+ metadata=metadata,
751
+ is_reproducible=True,
752
+ initial_engine_snapshot=cfg.get("initial_state"),
753
+ )
754
+ new_base_env = SokobanEnvironment(task_instance=instance)
755
+ wrapper.env = new_base_env
756
+ if request.seed is not None:
757
+ wrapper.seed = int(request.seed)
758
+ handle.seed = int(request.seed)
759
+
760
+ # Reset the environment regardless of type
761
+ result = await wrapper.initialize()
762
+
763
+ # Log a world signature after reset for sanity
764
+ try:
765
+ base_env = handle.env.env # type: ignore[attr-defined]
766
+ pub_state = base_env.engine._get_public_state_from_env() # type: ignore[attr-defined]
767
+ import hashlib
768
+ import json as _json
769
+
770
+ sig_src = {
771
+ "player_position": list(pub_state.player_position),
772
+ "player_direction": pub_state.player_direction,
773
+ "semantic_map": pub_state.semantic_map,
774
+ "inventory": {k: v for k, v in pub_state.inventory.items() if v},
775
+ }
776
+ sig_str = _json.dumps(sig_src, sort_keys=True)
777
+ sig = hashlib.md5(sig_str.encode("utf-8")).hexdigest()[:12]
778
+ logger.info(
779
+ "Crafter reset signature: seed=%s sig=%s pos=%s inv=%s",
780
+ str(handle.seed),
781
+ sig,
782
+ list(pub_state.player_position),
783
+ {k: v for k, v in pub_state.inventory.items() if v},
784
+ )
785
+ except Exception as _:
786
+ pass
787
+
788
+ # Update registry
789
+ handle.step_idx = result["step_idx"]
790
+ handle.last_observation = result["observation"]
791
+ handle.last_info = result.get("info")
792
+
793
+ return EnvResetResponse(
794
+ observation=result["observation"],
795
+ info=result.get("info"),
796
+ step_idx=result["step_idx"],
797
+ )
798
+
799
+ except Exception as e:
800
+ logger.error(f"Failed to reset environment {request.env_id}: {e}")
801
+ raise HTTPException(status_code=500, detail=str(e)) from e
802
+
803
+
804
+ @router.post("/step", response_model=EnvStepResponse)
805
+ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
806
+ """Execute a step in the environment."""
807
+ handle = registry.get_env(request.env_id)
808
+ if not handle:
809
+ raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
810
+
811
+ try:
812
+ # Execute the step, pre-normalizing invalid Wordle guesses to avoid hard failures
813
+ wrapper = handle.env
814
+ wordle_wrapper_cls = None
815
+ with contextlib.suppress(Exception):
816
+ from .envs.wordle.environment import WordleEnvironmentWrapper
817
+
818
+ wordle_wrapper_cls = WordleEnvironmentWrapper # type: ignore[assignment]
819
+
820
+ if wordle_wrapper_cls is not None and isinstance(wrapper, wordle_wrapper_cls):
821
+ expected_len = int(getattr(wrapper, "word_length", 5))
822
+ normalized: list[dict[str, Any]] = []
823
+ for tc in request.tool_calls or []:
824
+ tool = tc.get("tool") or tc.get("tool_name") or tc.get("name") or "interact"
825
+ args = tc.get("arguments") or tc.get("args") or {}
826
+ if isinstance(args, str):
827
+ try:
828
+ args = json.loads(args)
829
+ except Exception:
830
+ args = {}
831
+ guess = None
832
+ if isinstance(args, dict):
833
+ guess = args.get("guess") or args.get("word")
834
+ if isinstance(guess, str):
835
+ g = guess.strip().lower()
836
+ if (not g.isalpha()) or (len(g) != expected_len):
837
+ normalized.append(
838
+ {"tool": "invalid_guess", "args": {"original_guess": guess}}
839
+ )
840
+ else:
841
+ # Preserve the original tool name (interact or submit) for the environment to handle
842
+ normalized.append({"tool": tool, "args": {"guess": g}})
843
+ else:
844
+ normalized.append({"tool": "invalid_guess", "args": {"original_guess": guess}})
845
+ result = await wrapper.step(normalized)
846
+ else:
847
+ result = await handle.env.step(request.tool_calls)
848
+
849
+ # Validate observation structure for Wordle environments
850
+ env_name = getattr(handle.env, "env", None)
851
+ if (
852
+ env_name
853
+ and hasattr(env_name, "__class__")
854
+ and "wordle" in env_name.__class__.__name__.lower()
855
+ ):
856
+ # Extract observation fields from the flat result structure for validation
857
+ observation_for_validation = result.copy()
858
+ # Remove step_idx, done, info, reward, truncated from the observation since they're separate fields
859
+ for key in ["step_idx", "done", "info", "reward", "truncated"]:
860
+ if key in observation_for_validation:
861
+ del observation_for_validation[key]
862
+ await validate_environment_observation(observation_for_validation, "step")
863
+
864
+ # Update registry
865
+ handle.step_idx = result["step_idx"]
866
+
867
+ # Extract the observation fields from the result structure (handle both old nested and new flat)
868
+ if isinstance(result, dict) and "observation" in result:
869
+ # Old nested structure - extract the inner observation
870
+ observation_for_registry = result["observation"].copy()
871
+ else:
872
+ # New flat structure - remove non-observation fields
873
+ observation_for_registry = result.copy()
874
+ for key in ["step_idx", "done", "info", "reward", "truncated"]:
875
+ if key in observation_for_registry:
876
+ del observation_for_registry[key]
877
+
878
+ handle.last_observation = observation_for_registry
879
+ handle.last_info = result.get("info")
880
+
881
+ return EnvStepResponse(
882
+ observation=observation_for_registry,
883
+ done=result["done"],
884
+ info=result.get("info"),
885
+ reward=result.get("reward"),
886
+ truncated=result.get("truncated"),
887
+ step_idx=result["step_idx"],
888
+ )
889
+
890
+ except Exception as e:
891
+ logger.error(f"Failed to step environment {request.env_id}: {e}")
892
+ # Fallback for Wordle: convert invalid guesses into 'invalid_guess' tool calls and retry once
893
+ try:
894
+ wordle_wrapper_cls = None
895
+ with contextlib.suppress(Exception):
896
+ from .envs.wordle.environment import WordleEnvironmentWrapper
897
+
898
+ wordle_wrapper_cls = WordleEnvironmentWrapper # type: ignore[assignment]
899
+
900
+ wrapper = handle.env
901
+ if wordle_wrapper_cls is not None and isinstance(wrapper, wordle_wrapper_cls):
902
+ expected_len = int(getattr(wrapper, "word_length", 5))
903
+ normalized: list[dict[str, Any]] = []
904
+ for tc in request.tool_calls or []:
905
+ tool = tc.get("tool") or tc.get("tool_name") or tc.get("name") or "interact"
906
+ args = tc.get("arguments") or tc.get("args") or {}
907
+ if isinstance(args, str):
908
+ try:
909
+ args = json.loads(args)
910
+ except Exception:
911
+ args = {}
912
+ guess = None
913
+ if isinstance(args, dict):
914
+ guess = args.get("guess") or args.get("word")
915
+ if isinstance(guess, str):
916
+ g = guess.strip().lower()
917
+ if (not g.isalpha()) or (len(g) != expected_len):
918
+ normalized.append(
919
+ {
920
+ "tool": "invalid_guess",
921
+ "args": {"original_guess": guess},
922
+ }
923
+ )
924
+ else:
925
+ normalized.append({"tool": "interact", "args": {"guess": g}})
926
+ else:
927
+ normalized.append(
928
+ {"tool": "invalid_guess", "args": {"original_guess": guess}}
929
+ )
930
+
931
+ # Retry with normalized calls, allowing the wrapper to synthesize an observation
932
+ result = await wrapper.step(normalized)
933
+
934
+ # Update registry and return as usual
935
+ handle.step_idx = result["step_idx"]
936
+ if isinstance(result, dict) and "observation" in result:
937
+ observation_for_registry = result["observation"].copy()
938
+ else:
939
+ observation_for_registry = result.copy()
940
+ for key in ["step_idx", "done", "info", "reward", "truncated"]:
941
+ if key in observation_for_registry:
942
+ del observation_for_registry[key]
943
+ handle.last_observation = observation_for_registry
944
+ handle.last_info = result.get("info")
945
+ return EnvStepResponse(
946
+ observation=observation_for_registry,
947
+ done=result["done"],
948
+ info=result.get("info"),
949
+ reward=result.get("reward"),
950
+ truncated=result.get("truncated"),
951
+ step_idx=result["step_idx"],
952
+ )
953
+ except Exception:
954
+ # Ignore fallback errors; fall through to generic error
955
+ pass
956
+
957
+ raise HTTPException(status_code=500, detail=f"{type(e).__name__}: {e}") from e
958
+
959
+
960
+ @router.post("/snapshot", response_model=EnvSnapshotResponse)
961
+ async def snapshot_environment(request: EnvSnapshotRequest) -> EnvSnapshotResponse:
962
+ """Create a snapshot of the environment state."""
963
+ handle = registry.get_env(request.env_id)
964
+ if not handle:
965
+ raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
966
+
967
+ try:
968
+ # Serialize environment state
969
+ state_dict = await handle.env.serialize()
970
+
971
+ # Save to volume
972
+ snapshot_id, path, size = storage.save_snapshot(
973
+ rl_run_id=handle.rl_run_id,
974
+ kind="env",
975
+ state_dict=state_dict,
976
+ config={"seed": handle.seed},
977
+ )
978
+
979
+ # Register snapshot
980
+ registry.register_snapshot(
981
+ kind="env",
982
+ rl_run_id=handle.rl_run_id,
983
+ size=size,
984
+ path=path,
985
+ )
986
+
987
+ return EnvSnapshotResponse(
988
+ snapshot_id=snapshot_id,
989
+ path=path,
990
+ rl_run_id=handle.rl_run_id,
991
+ size=size,
992
+ )
993
+
994
+ except Exception as e:
995
+ logger.error(f"Failed to snapshot environment {request.env_id}: {e}")
996
+ raise HTTPException(status_code=500, detail=str(e)) from e
997
+
998
+
999
+ @router.post("/restore", response_model=EnvRestoreResponse)
1000
+ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
1001
+ """Restore an environment from a snapshot."""
1002
+ snapshot = registry.get_snapshot(request.snapshot_id)
1003
+ if not snapshot:
1004
+ raise HTTPException(status_code=404, detail=f"Snapshot {request.snapshot_id} not found")
1005
+
1006
+ if snapshot.kind != "env":
1007
+ raise HTTPException(
1008
+ status_code=422,
1009
+ detail=f"Snapshot {request.snapshot_id} is not an environment snapshot",
1010
+ )
1011
+
1012
+ try:
1013
+ # Load snapshot from volume
1014
+ state_dict, meta = storage.load_snapshot(
1015
+ rl_run_id=snapshot.rl_run_id,
1016
+ kind="env",
1017
+ snapshot_id=request.snapshot_id,
1018
+ )
1019
+
1020
+ # Recreate environment
1021
+ env_name = state_dict.get("name", "crafter")
1022
+ name_lower = str(env_name).lower()
1023
+ if name_lower == "crafter":
1024
+ # Create base environment
1025
+ # Recreate classic env from snapshot metadata
1026
+ seed_value = state_dict["config"]["seed"]
1027
+ metadata = CrafterTaskInstanceMetadata(
1028
+ difficulty="normal",
1029
+ seed=seed_value,
1030
+ num_trees_radius=0,
1031
+ num_cows_radius=0,
1032
+ num_hostiles_radius=0,
1033
+ )
1034
+ instance = CrafterTaskInstance(
1035
+ id=uuid4(),
1036
+ impetus=Impetus(instructions="Restore"),
1037
+ intent=Intent(
1038
+ rubric={"goal": "Restore"},
1039
+ gold_trajectories=None,
1040
+ gold_state_diff={},
1041
+ ),
1042
+ metadata=metadata,
1043
+ is_reproducible=True,
1044
+ initial_engine_snapshot=None,
1045
+ )
1046
+ base_env = CrafterClassicEnvironment(task_instance=instance)
1047
+
1048
+ # Deserialize into wrapper
1049
+ wrapper = await CrafterEnvironmentWrapper.deserialize(
1050
+ payload=state_dict,
1051
+ env=base_env,
1052
+ )
1053
+
1054
+ # Register new instance
1055
+ env_id = registry.register_env(
1056
+ env=wrapper,
1057
+ seed=wrapper.seed,
1058
+ rl_run_id=snapshot.rl_run_id,
1059
+ last_observation=wrapper.last_observation,
1060
+ last_info=wrapper.last_info,
1061
+ )
1062
+
1063
+ # Update step index
1064
+ handle = registry.get_env(env_id)
1065
+ if handle:
1066
+ handle.step_idx = wrapper.step_idx
1067
+
1068
+ return EnvRestoreResponse(
1069
+ env_id=env_id,
1070
+ observation=wrapper.last_observation or {},
1071
+ info=wrapper.last_info,
1072
+ step_idx=wrapper.step_idx,
1073
+ )
1074
+ elif name_lower == "wordle":
1075
+ try:
1076
+ from synth_ai.environments.examples.wordle.environment import (
1077
+ WordleEnvironment,
1078
+ )
1079
+ from synth_ai.environments.examples.wordle.taskset import (
1080
+ WordleTaskInstance,
1081
+ WordleTaskInstanceMetadata,
1082
+ create_wordle_taskset,
1083
+ )
1084
+ except Exception as e:
1085
+ raise HTTPException(
1086
+ status_code=500, detail=f"Wordle modules unavailable: {e}"
1087
+ ) from e
1088
+
1089
+ cfg = state_dict.get("config", {}) or {}
1090
+ word_length = int(cfg.get("word_length", 5))
1091
+ max_guesses = int(cfg.get("max_guesses", 6))
1092
+ init_snap = cfg.get("initial_engine_snapshot")
1093
+ if init_snap is not None:
1094
+ metadata = WordleTaskInstanceMetadata(
1095
+ word_length=word_length, max_guesses=max_guesses
1096
+ )
1097
+ instance = WordleTaskInstance(
1098
+ id=uuid4(),
1099
+ impetus=Impetus(instructions="Restore"),
1100
+ intent=Intent(
1101
+ rubric={"goal": "Restore"},
1102
+ gold_trajectories=None,
1103
+ gold_state_diff={},
1104
+ ),
1105
+ metadata=metadata,
1106
+ is_reproducible=True,
1107
+ initial_engine_snapshot=init_snap,
1108
+ )
1109
+ base_env = WordleEnvironment(task_instance=instance)
1110
+ else:
1111
+ ts = await create_wordle_taskset(
1112
+ sample_size=1, word_length=word_length, max_guesses=max_guesses
1113
+ )
1114
+ instance = ts.instances[0]
1115
+ base_env = WordleEnvironment(task_instance=instance)
1116
+ # Lazy import of wrapper only when needed
1117
+ try:
1118
+ from .envs.wordle.environment import WordleEnvironmentWrapper
1119
+ except Exception as e:
1120
+ raise HTTPException(
1121
+ status_code=500, detail=f"Wordle wrapper unavailable: {e}"
1122
+ ) from e
1123
+ wrapper = await WordleEnvironmentWrapper.deserialize(payload=state_dict, env=base_env)
1124
+
1125
+ env_id = registry.register_env(
1126
+ env=wrapper,
1127
+ seed=wrapper.seed,
1128
+ rl_run_id=snapshot.rl_run_id,
1129
+ last_observation=wrapper.last_observation,
1130
+ last_info=wrapper.last_info,
1131
+ )
1132
+ handle = registry.get_env(env_id)
1133
+ if handle:
1134
+ handle.step_idx = wrapper.step_idx
1135
+ return EnvRestoreResponse(
1136
+ env_id=env_id,
1137
+ observation=wrapper.last_observation or {},
1138
+ info=wrapper.last_info,
1139
+ step_idx=wrapper.step_idx,
1140
+ )
1141
+
1142
+ elif name_lower == "sokoban":
1143
+ try:
1144
+ from synth_ai.environments.examples.sokoban.environment import (
1145
+ SokobanEnvironment,
1146
+ )
1147
+ from synth_ai.environments.examples.sokoban.taskset import (
1148
+ SokobanTaskInstance,
1149
+ SokobanTaskInstanceMetadata,
1150
+ )
1151
+ except Exception as e:
1152
+ raise HTTPException(
1153
+ status_code=500, detail=f"Sokoban modules unavailable: {e}"
1154
+ ) from e
1155
+
1156
+ cfg = state_dict.get("config", {}) or {}
1157
+ metadata = SokobanTaskInstanceMetadata(difficulty=cfg.get("difficulty", "easy"))
1158
+ instance = SokobanTaskInstance(
1159
+ id=uuid4(),
1160
+ impetus=Impetus(instructions="Restore"),
1161
+ intent=Intent(
1162
+ rubric={"goal": "Restore"},
1163
+ gold_trajectories=None,
1164
+ gold_state_diff={},
1165
+ ),
1166
+ metadata=metadata,
1167
+ is_reproducible=True,
1168
+ initial_engine_snapshot=cfg.get("initial_state"),
1169
+ )
1170
+ base_env = SokobanEnvironment(task_instance=instance)
1171
+ # Lazy import of wrapper only when needed
1172
+ try:
1173
+ from .envs.sokoban.environment import SokobanEnvironmentWrapper
1174
+ except Exception as e:
1175
+ raise HTTPException(
1176
+ status_code=500, detail=f"Sokoban wrapper unavailable: {e}"
1177
+ ) from e
1178
+ wrapper = await SokobanEnvironmentWrapper.deserialize(payload=state_dict, env=base_env)
1179
+
1180
+ env_id = registry.register_env(
1181
+ env=wrapper,
1182
+ seed=wrapper.seed,
1183
+ rl_run_id=snapshot.rl_run_id,
1184
+ last_observation=wrapper.last_observation,
1185
+ last_info=wrapper.last_info,
1186
+ )
1187
+ handle = registry.get_env(env_id)
1188
+ if handle:
1189
+ handle.step_idx = wrapper.step_idx
1190
+ return EnvRestoreResponse(
1191
+ env_id=env_id,
1192
+ observation=wrapper.last_observation or {},
1193
+ info=wrapper.last_info,
1194
+ step_idx=wrapper.step_idx,
1195
+ )
1196
+
1197
+ else:
1198
+ raise HTTPException(
1199
+ status_code=422,
1200
+ detail=f"Unknown environment name in snapshot: {env_name}",
1201
+ )
1202
+
1203
+ except Exception as e:
1204
+ logger.error(f"Failed to restore environment from snapshot {request.snapshot_id}: {e}")
1205
+ raise HTTPException(status_code=500, detail=str(e)) from e
1206
+
1207
+
1208
+ @router.post("/terminate", response_model=EnvTerminateResponse)
1209
+ async def terminate_environment(request: EnvTerminateRequest) -> EnvTerminateResponse:
1210
+ """Terminate an environment and clean up resources."""
1211
+ handle = registry.get_env(request.env_id)
1212
+ if not handle:
1213
+ raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
1214
+
1215
+ try:
1216
+ # Call terminate on the environment
1217
+ await handle.env.terminate()
1218
+
1219
+ # Remove from registry
1220
+ registry.remove_env(request.env_id)
1221
+
1222
+ return EnvTerminateResponse(ok=True)
1223
+
1224
+ except Exception as e:
1225
+ logger.error(f"Failed to terminate environment {request.env_id}: {e}")
1226
+ raise HTTPException(status_code=500, detail=str(e)) from e