synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (890) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +60 -2
  461. synth_ai/api/train/builders.py +347 -39
  462. synth_ai/api/train/cli.py +895 -160
  463. synth_ai/api/train/config_finder.py +103 -25
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +70 -20
  470. synth_ai/api/train/pollers.py +29 -4
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +6 -4
  475. synth_ai/api/train/utils.py +64 -52
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +85 -63
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +156 -116
  554. synth_ai/cli/root.py +131 -132
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +2284 -257
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +579 -291
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  583. synth_ai/demos/demo_task_apps/core.py +64 -28
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  591. synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
  592. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  593. synth_ai/demos/math/__init__.py +1 -0
  594. synth_ai/demos/math/_common.py +16 -0
  595. synth_ai/demos/math/app.py +38 -0
  596. synth_ai/demos/math/config.toml +76 -0
  597. synth_ai/demos/math/deploy_modal.py +54 -0
  598. synth_ai/demos/math/modal_task_app.py +703 -0
  599. synth_ai/demos/math/task_app_entry.py +51 -0
  600. synth_ai/environments/environment/core.py +7 -1
  601. synth_ai/environments/examples/bandit/engine.py +12 -5
  602. synth_ai/environments/examples/bandit/environment.py +0 -1
  603. synth_ai/environments/examples/bandit/taskset.py +4 -4
  604. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  605. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  606. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  607. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  608. synth_ai/environments/examples/enron/engine.py +7 -2
  609. synth_ai/environments/examples/enron/environment.py +68 -0
  610. synth_ai/environments/examples/red/engine.py +60 -12
  611. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  612. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  613. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  614. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  615. synth_ai/environments/examples/red/environment.py +86 -0
  616. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  617. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  618. synth_ai/environments/examples/verilog/engine.py +104 -12
  619. synth_ai/environments/examples/wordle/environment.py +0 -1
  620. synth_ai/environments/reproducibility/tree.py +5 -6
  621. synth_ai/environments/service/app.py +11 -12
  622. synth_ai/environments/service/core_routes.py +10 -9
  623. synth_ai/environments/stateful/engine.py +1 -1
  624. synth_ai/environments/tasks/core.py +1 -0
  625. synth_ai/environments/tasks/filters.py +5 -6
  626. synth_ai/environments/tasks/utils.py +4 -5
  627. synth_ai/evals/__init__.py +15 -0
  628. synth_ai/evals/base.py +14 -5
  629. synth_ai/evals/client.py +82 -0
  630. synth_ai/evals/types.py +42 -0
  631. synth_ai/http.py +8 -22
  632. synth_ai/http_client.py +45 -12
  633. synth_ai/inference/__init__.py +0 -2
  634. synth_ai/inference/client.py +21 -7
  635. synth_ai/jobs/client.py +129 -80
  636. synth_ai/judge_schemas.py +127 -0
  637. synth_ai/learning/__init__.py +51 -6
  638. synth_ai/learning/algorithms.py +14 -0
  639. synth_ai/learning/client.py +122 -30
  640. synth_ai/learning/config.py +2 -40
  641. synth_ai/learning/constants.py +0 -2
  642. synth_ai/learning/ft_client.py +4 -56
  643. synth_ai/learning/health.py +14 -8
  644. synth_ai/learning/jobs.py +43 -47
  645. synth_ai/learning/prompt_learning_client.py +276 -0
  646. synth_ai/learning/prompt_learning_types.py +185 -0
  647. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  648. synth_ai/learning/rl/client.py +269 -0
  649. synth_ai/learning/rl/config.py +31 -0
  650. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  651. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  652. synth_ai/learning/rl/secrets.py +13 -0
  653. synth_ai/learning/rl_client.py +2 -253
  654. synth_ai/learning/sft/__init__.py +29 -0
  655. synth_ai/learning/sft/client.py +68 -0
  656. synth_ai/learning/sft/config.py +270 -0
  657. synth_ai/learning/sft/data.py +698 -0
  658. synth_ai/learning/sse.py +25 -26
  659. synth_ai/learning/validators.py +29 -25
  660. synth_ai/mcp/__init__.py +5 -0
  661. synth_ai/mcp/__main__.py +8 -0
  662. synth_ai/mcp/main.py +254 -0
  663. synth_ai/mcp/setup.py +100 -0
  664. synth_ai/modal.py +257 -0
  665. synth_ai/pricing/__init__.py +3 -0
  666. synth_ai/pricing/model_pricing.py +64 -0
  667. synth_ai/session/__init__.py +75 -0
  668. synth_ai/session/client.py +383 -0
  669. synth_ai/session/constants.py +63 -0
  670. synth_ai/session/exceptions.py +105 -0
  671. synth_ai/session/manager.py +139 -0
  672. synth_ai/session/models.py +89 -0
  673. synth_ai/session/query.py +110 -0
  674. synth_ai/spec/__init__.py +46 -0
  675. synth_ai/spec/dataclasses.py +149 -0
  676. synth_ai/spec/loader.py +144 -0
  677. synth_ai/spec/serializer.py +199 -0
  678. synth_ai/spec/validation.py +250 -0
  679. synth_ai/streaming/__init__.py +29 -0
  680. synth_ai/streaming/config.py +94 -0
  681. synth_ai/streaming/handlers.py +589 -0
  682. synth_ai/streaming/streamer.py +320 -0
  683. synth_ai/streaming/types.py +95 -0
  684. synth_ai/task/__init__.py +50 -30
  685. synth_ai/task/apps/__init__.py +63 -19
  686. synth_ai/task/auth.py +35 -23
  687. synth_ai/task/client.py +15 -13
  688. synth_ai/task/config.py +261 -0
  689. synth_ai/task/contracts.py +165 -64
  690. synth_ai/task/datasets.py +9 -6
  691. synth_ai/task/errors.py +11 -10
  692. synth_ai/task/health.py +17 -11
  693. synth_ai/task/inference_api.py +101 -0
  694. synth_ai/task/json.py +58 -24
  695. synth_ai/task/proxy.py +59 -66
  696. synth_ai/task/rubrics/__init__.py +55 -0
  697. synth_ai/task/rubrics/loaders.py +156 -0
  698. synth_ai/task/rubrics/models.py +57 -0
  699. synth_ai/task/rubrics/scoring.py +116 -0
  700. synth_ai/task/rubrics/strict.py +149 -0
  701. synth_ai/task/rubrics.py +22 -15
  702. synth_ai/task/server.py +65 -31
  703. synth_ai/task/trace_correlation_helpers.py +328 -0
  704. synth_ai/task/tracing_utils.py +44 -28
  705. synth_ai/task/validators.py +449 -6
  706. synth_ai/task/vendors.py +5 -7
  707. synth_ai/tracing_v3/__init__.py +4 -0
  708. synth_ai/tracing_v3/abstractions.py +21 -4
  709. synth_ai/tracing_v3/config.py +167 -22
  710. synth_ai/tracing_v3/constants.py +21 -0
  711. synth_ai/tracing_v3/db_config.py +42 -29
  712. synth_ai/tracing_v3/decorators.py +80 -45
  713. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  714. synth_ai/tracing_v3/hooks.py +6 -4
  715. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  716. synth_ai/tracing_v3/migration_helper.py +1 -2
  717. synth_ai/tracing_v3/replica_sync.py +12 -7
  718. synth_ai/tracing_v3/serialization.py +130 -0
  719. synth_ai/tracing_v3/session_tracer.py +73 -16
  720. synth_ai/tracing_v3/storage/base.py +89 -1
  721. synth_ai/tracing_v3/storage/config.py +63 -16
  722. synth_ai/tracing_v3/storage/factory.py +11 -9
  723. synth_ai/tracing_v3/storage/utils.py +15 -11
  724. synth_ai/tracing_v3/trace_utils.py +317 -0
  725. synth_ai/tracing_v3/turso/__init__.py +8 -21
  726. synth_ai/tracing_v3/turso/daemon.py +123 -15
  727. synth_ai/tracing_v3/turso/models.py +5 -2
  728. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  729. synth_ai/tracing_v3/utils.py +5 -4
  730. synth_ai/tunnel.py +143 -0
  731. synth_ai/tunnel_deploy.py +278 -0
  732. synth_ai/types.py +8 -0
  733. synth_ai/urls.py +11 -0
  734. synth_ai/utils/__init__.py +166 -0
  735. synth_ai/utils/agents.py +74 -0
  736. synth_ai/utils/apps.py +152 -0
  737. synth_ai/utils/base_url.py +94 -0
  738. synth_ai/utils/bin.py +39 -0
  739. synth_ai/utils/claude.py +36 -0
  740. synth_ai/utils/cli.py +284 -0
  741. synth_ai/utils/config.py +81 -0
  742. synth_ai/utils/env.py +346 -0
  743. synth_ai/utils/errors.py +85 -0
  744. synth_ai/utils/http.py +172 -0
  745. synth_ai/utils/json.py +72 -0
  746. synth_ai/utils/log_filter.py +99 -0
  747. synth_ai/utils/logging.py +198 -0
  748. synth_ai/utils/modal.py +299 -0
  749. synth_ai/utils/paths.py +95 -0
  750. synth_ai/utils/process.py +233 -0
  751. synth_ai/utils/prompts.py +39 -0
  752. synth_ai/utils/sqld.py +122 -0
  753. synth_ai/utils/ssl.py +25 -0
  754. synth_ai/utils/task_app_discovery.py +882 -0
  755. synth_ai/utils/task_app_env.py +186 -0
  756. synth_ai/utils/task_app_state.py +318 -0
  757. synth_ai/utils/tunnel/__init__.py +12 -0
  758. synth_ai/utils/tunnel/config.py +55 -0
  759. synth_ai/utils/user_config.py +137 -0
  760. synth_ai/uvicorn.py +77 -0
  761. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  762. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  763. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  764. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  765. synth_ai/cli/man.py +0 -106
  766. synth_ai/core/experiment.py +0 -15
  767. synth_ai/core/system.py +0 -15
  768. synth_ai/demo_registry.py +0 -258
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -107
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/task/apps/grpo_crafter.py +0 -438
  838. synth_ai/tracing/__init__.py +0 -30
  839. synth_ai/tracing_v1/__init__.py +0 -33
  840. synth_ai/tracing_v3/turso/manager.py +0 -774
  841. synth_ai/v0/tracing/abstractions.py +0 -224
  842. synth_ai/v0/tracing/base_client.py +0 -91
  843. synth_ai/v0/tracing/client_manager.py +0 -131
  844. synth_ai/v0/tracing/config.py +0 -142
  845. synth_ai/v0/tracing/context.py +0 -146
  846. synth_ai/v0/tracing/decorators.py +0 -682
  847. synth_ai/v0/tracing/events/__init__.py +0 -0
  848. synth_ai/v0/tracing/events/manage.py +0 -147
  849. synth_ai/v0/tracing/events/scope.py +0 -86
  850. synth_ai/v0/tracing/events/store.py +0 -228
  851. synth_ai/v0/tracing/immediate_client.py +0 -151
  852. synth_ai/v0/tracing/local.py +0 -18
  853. synth_ai/v0/tracing/log_client_base.py +0 -73
  854. synth_ai/v0/tracing/retry_queue.py +0 -186
  855. synth_ai/v0/tracing/trackers.py +0 -515
  856. synth_ai/v0/tracing/upload.py +0 -512
  857. synth_ai/v0/tracing/utils.py +0 -9
  858. synth_ai/v0/tracing_v1/__init__.py +0 -16
  859. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  860. synth_ai/v0/tracing_v1/base_client.py +0 -91
  861. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  862. synth_ai/v0/tracing_v1/config.py +0 -142
  863. synth_ai/v0/tracing_v1/context.py +0 -146
  864. synth_ai/v0/tracing_v1/decorators.py +0 -703
  865. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  866. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  867. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  868. synth_ai/v0/tracing_v1/events/store.py +0 -228
  869. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  870. synth_ai/v0/tracing_v1/local.py +0 -18
  871. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  872. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  873. synth_ai/v0/tracing_v1/trackers.py +0 -515
  874. synth_ai/v0/tracing_v1/upload.py +0 -527
  875. synth_ai/v0/tracing_v1/utils.py +0 -9
  876. synth_ai/zyk/__init__.py +0 -30
  877. synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
  878. synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
  879. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  880. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  881. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  882. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  885. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  886. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  887. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  888. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  889. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  890. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -3,274 +3,246 @@ from typing import Any, Dict, Set
3
3
  from synth_ai.environments.environment.rewards.core import RewardComponent
4
4
 
5
5
 
6
- class BadgeRewardComponent(RewardComponent):
7
- """Reward for earning gym badges"""
6
+ # ===== COMPREHENSIVE POKEMON RED PROGRESS REWARD SYSTEM =====
7
+ # Designed for deterministic rewards that guide toward beating Brock at Pewter Gym
8
8
 
9
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
10
- prev_badges = action.get("prev_badges", 0)
11
- current_badges = state["badges"]
12
- new_badges = current_badges & ~prev_badges
13
- badge_count = bin(new_badges).count("1")
14
- return badge_count * 1.0
15
9
 
10
+ class RouteExplorationReward(RewardComponent):
11
+ """High rewards for reaching key areas on the path to Pewter Gym - guides exploration"""
16
12
 
17
- class MapTransitionComponent(RewardComponent):
18
- """Reward for exploring new areas"""
13
+ def __init__(self):
14
+ self.key_areas_reached: Set[int] = set()
19
15
 
20
16
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
21
- prev_map = action.get("prev_map_id", -1)
22
17
  current_map = state["map_id"]
23
- return 0.1 if current_map != prev_map else 0.0
24
-
18
+ prev_map = action.get("prev_map_id", -1)
25
19
 
26
- class BattleVictoryComponent(RewardComponent):
27
- """Reward for winning battles"""
20
+ # Key maps and rewards for progressing toward Pewter Gym
21
+ area_rewards = {
22
+ 0: 0.0, # Pallet Town (starting point)
23
+ 1: 2.0, # Route 1 - First step out of town (+2.0)
24
+ 2: 1.5, # Viridian City - Major hub (+1.5)
25
+ 3: 1.0, # Route 22 - Path to League (+1.0)
26
+ 4: 1.0, # Route 2 - To Viridian Forest (+1.0)
27
+ 5: 2.0, # Viridian Forest - Dense area (+2.0)
28
+ 6: 1.5, # Pewter City - Target city (+1.5)
29
+ 7: 5.0, # Pewter Gym - GOAL AREA (+5.0 for entering gym)
30
+ }
31
+
32
+ if current_map in area_rewards and current_map not in self.key_areas_reached:
33
+ if prev_map != current_map: # Only reward when actually entering new area
34
+ self.key_areas_reached.add(current_map)
35
+ return area_rewards[current_map]
28
36
 
29
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
30
- prev_in_battle = action.get("prev_in_battle", False)
31
- current_in_battle = state["in_battle"]
32
- battle_outcome = state["battle_outcome"]
33
-
34
- # Transitioning from battle to not in battle with victory
35
- if prev_in_battle and not current_in_battle and battle_outcome == 1:
36
- return 0.5
37
37
  return 0.0
38
38
 
39
39
 
40
- class LevelUpComponent(RewardComponent):
41
- """Reward for Pokemon leveling up"""
40
+ class StrategicTrainingReward(RewardComponent):
41
+ """Rewards for building Pokemon strength strategically"""
42
+
43
+ def __init__(self):
44
+ self.level_milestones: Set[int] = set()
45
+ self.last_level = 0
42
46
 
43
47
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
48
+ current_level = state.get("party_level", 0)
44
49
  prev_level = action.get("prev_party_level", 0)
45
- current_level = state["party_level"]
46
- level_gain = max(0, current_level - prev_level)
47
- return level_gain * 0.3
48
50
 
51
+ # Reward reaching key level milestones
52
+ milestone_rewards = {
53
+ 8: 1.0, # Level 8 - Good for early battles
54
+ 12: 2.0, # Level 12 - Ready for Brock
55
+ 15: 3.0, # Level 15 - Strong Pokemon
56
+ }
49
57
 
50
- class XPGainComponent(RewardComponent):
51
- """Small reward for XP gains"""
58
+ if current_level > prev_level and current_level in milestone_rewards:
59
+ if current_level not in self.level_milestones:
60
+ self.level_milestones.add(current_level)
61
+ return milestone_rewards[current_level]
52
62
 
53
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
54
- prev_xp = action.get("prev_party_xp", 0)
55
- current_xp = state["party_xp"]
56
- xp_gain = max(0, current_xp - prev_xp)
57
- return xp_gain * 0.001 # Very small multiplier
63
+ # Small reward for any level up (0.2 points)
64
+ if current_level > prev_level:
65
+ return 0.2
58
66
 
67
+ return 0.0
59
68
 
60
- class StepPenaltyComponent(RewardComponent):
61
- """Small penalty for each step to encourage efficiency"""
62
69
 
63
- def __init__(self, penalty: float = -0.001):
64
- self.penalty = penalty
70
+ class BattleProgressionReward(RewardComponent):
71
+ """Rewards for winning battles and gaining experience"""
65
72
 
66
73
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
67
- return self.penalty
74
+ prev_in_battle = action.get("prev_in_battle", False)
75
+ current_in_battle = state.get("in_battle", False)
76
+ battle_outcome = state.get("battle_outcome", 0)
68
77
 
78
+ # Large reward for battle victory (+1.0)
79
+ if prev_in_battle and not current_in_battle and battle_outcome == 1:
80
+ return 1.0
69
81
 
70
- class MenuPenaltyComponent(RewardComponent):
71
- """Penalty for excessive menu usage"""
82
+ # Small reward for entering battle (+0.1) - shows engagement
83
+ if not prev_in_battle and current_in_battle:
84
+ return 0.1
72
85
 
73
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
74
- # This would need more sophisticated menu tracking
75
86
  return 0.0
76
87
 
77
88
 
78
- # ===== NEW EARLY GAME PALLET TOWN REWARDS =====
79
-
80
-
81
- class ExitHouseReward(RewardComponent):
82
- """High reward for first time leaving the starting house - +2.0 points"""
89
+ class GymPreparationReward(RewardComponent):
90
+ """Rewards for preparing to challenge Brock"""
83
91
 
84
92
  def __init__(self):
85
- self.house_exited = False
93
+ self.prepared_for_gym = False
86
94
 
87
95
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
88
- if self.house_exited:
96
+ if self.prepared_for_gym:
89
97
  return 0.0
90
98
 
91
- prev_map = action.get("prev_map_id", -1)
92
- current_map = state["map_id"]
99
+ # Check if in Pewter City area and have decent Pokemon
100
+ if state["map_id"] in [6, 7]: # Pewter City or Gym
101
+ party_level = state.get("party_level", 0)
102
+ party_count = len(state.get("party", []))
103
+
104
+ # Reward being prepared for gym battle
105
+ if party_level >= 10 and party_count >= 1:
106
+ self.prepared_for_gym = True
107
+ return 3.0 # Significant reward for being gym-ready
93
108
 
94
- # Exit from house to town (assuming house maps are 1,2 and town is 0)
95
- if prev_map in [1, 2] and current_map == 0:
96
- self.house_exited = True
97
- return 2.0
98
109
  return 0.0
99
110
 
100
111
 
101
- class NPCInteractionReward(RewardComponent):
102
- """Reward for talking to NPCs - +0.8 points per unique NPC"""
112
+ class ItemCollectionReward(RewardComponent):
113
+ """Rewards for collecting useful items"""
103
114
 
104
115
  def __init__(self):
105
- self.npcs_talked_to: Set[tuple] = set()
116
+ self.items_collected: Set[int] = set()
106
117
 
107
118
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
108
- # Detect NPC conversations
109
- if state["text_box_active"] and not action.get("prev_text_box_active", False):
110
- # Use position as NPC identifier
111
- npc_key = (state["player_x"], state["player_y"], state["map_id"])
112
- if npc_key not in self.npcs_talked_to:
113
- self.npcs_talked_to.add(npc_key)
114
- return 0.8
115
- return 0.0
119
+ prev_inventory = action.get("prev_inventory", [])
120
+ current_inventory = state.get("inventory", [])
116
121
 
122
+ # Check for new items
123
+ prev_item_ids = {item["item_id"] for item in prev_inventory}
124
+ current_item_ids = {item["item_id"] for item in current_inventory}
117
125
 
118
- class OakLabDiscoveryReward(RewardComponent):
119
- """High reward for finding and entering Oak's lab - +2.5 points"""
126
+ new_items = current_item_ids - prev_item_ids
120
127
 
121
- def __init__(self):
122
- self.lab_discovered = False
128
+ # Reward valuable items for gym preparation
129
+ valuable_items = {1, 2, 3, 4, 5, 10, 11, 12, 13} # Potions, Balls, etc.
130
+ reward = 0.0
131
+
132
+ for item_id in new_items:
133
+ if item_id not in self.items_collected:
134
+ self.items_collected.add(item_id)
135
+ if item_id in valuable_items:
136
+ reward += 0.5 # +0.5 per valuable item
137
+ else:
138
+ reward += 0.1 # +0.1 per other item
139
+
140
+ return reward
141
+
142
+
143
+ class HealingManagementReward(RewardComponent):
144
+ """Rewards for keeping Pokemon healthy"""
123
145
 
124
146
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
125
- if self.lab_discovered:
147
+ prev_party = action.get("prev_party", [])
148
+ current_party = state.get("party", [])
149
+
150
+ if not prev_party or not current_party:
126
151
  return 0.0
127
152
 
128
- prev_map = action.get("prev_map_id", -1)
129
- current_map = state["map_id"]
153
+ # Reward healing Pokemon back to full health
154
+ prev_hp_pct = sum(p.get("hp_percentage", 0) for p in prev_party) / len(prev_party)
155
+ current_hp_pct = sum(p.get("hp_percentage", 0) for p in current_party) / len(current_party)
156
+
157
+ # Significant improvement in health
158
+ if current_hp_pct > prev_hp_pct + 20: # Healed at least 20% overall
159
+ return 0.8
160
+
161
+ # Small reward for maintaining good health
162
+ if current_hp_pct >= 80 and prev_hp_pct >= 80:
163
+ return 0.05
130
164
 
131
- # Entering Oak's lab (assuming map 3)
132
- if prev_map == 0 and current_map == 3:
133
- self.lab_discovered = True
134
- return 2.5
135
165
  return 0.0
136
166
 
137
167
 
138
- class StarterPokemonReward(RewardComponent):
139
- """Very high reward for getting first Pokemon - +10.0 points"""
168
+ class EfficientExplorationReward(RewardComponent):
169
+ """Rewards for exploring efficiently without getting lost"""
140
170
 
141
171
  def __init__(self):
142
- self.starter_obtained = False
172
+ self.positions_visited: Set[tuple] = set()
143
173
 
144
174
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
145
- if self.starter_obtained:
146
- return 0.0
175
+ # Track unique positions visited in each map
176
+ position_key = (state["map_id"], state["player_x"], state["player_y"])
147
177
 
148
- # Detect getting first Pokemon
149
- prev_party_count = len(action.get("prev_party", []))
150
- current_party_count = len(state.get("party", []))
178
+ if position_key not in self.positions_visited:
179
+ self.positions_visited.add(position_key)
180
+ return 0.02 # Small reward for discovering new areas
151
181
 
152
- if prev_party_count == 0 and current_party_count == 1:
153
- if state["map_id"] == 3: # In Oak's lab
154
- self.starter_obtained = True
155
- return 10.0
156
182
  return 0.0
157
183
 
158
184
 
159
- class FirstBattleReward(RewardComponent):
160
- """High reward for engaging in first battle - +5.0 points"""
161
-
162
- def __init__(self):
163
- self.first_battle = False
185
+ class BadgeVictoryReward(RewardComponent):
186
+ """HUGE reward for achieving the main goal - Boulder Badge"""
164
187
 
165
188
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
166
- if self.first_battle:
167
- return 0.0
189
+ prev_badges = action.get("prev_badges", 0)
190
+ current_badges = state.get("badges", 0)
168
191
 
169
- prev_in_battle = action.get("prev_in_battle", False)
170
- current_in_battle = state["in_battle"]
192
+ # Check if Boulder Badge (bit 0) was newly earned
193
+ boulder_badge_mask = 0x01
194
+ prev_has_badge = prev_badges & boulder_badge_mask
195
+ current_has_badge = current_badges & boulder_badge_mask
196
+
197
+ if not prev_has_badge and current_has_badge:
198
+ return 50.0 # MASSIVE reward for completing the main objective
171
199
 
172
- if not prev_in_battle and current_in_battle:
173
- self.first_battle = True
174
- return 5.0
175
200
  return 0.0
176
201
 
177
202
 
178
- class DirectionExplorationReward(RewardComponent):
179
- """Reward for trying all movement directions - +1.0 points when complete"""
203
+ class StepPenaltyComponent(RewardComponent):
204
+ """Small penalty for each step to encourage efficiency"""
180
205
 
181
- def __init__(self):
182
- self.directions_tried: Set[str] = set()
183
- self.reward_given = False
206
+ def __init__(self, penalty: float = 0.0): # Changed from -0.005 to 0.0
207
+ self.penalty = penalty
184
208
 
185
209
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
186
- if self.reward_given:
187
- return 0.0
210
+ return self.penalty
188
211
 
189
- # Track movement directions based on position changes
190
- prev_x = action.get("prev_player_x", state["player_x"])
191
- prev_y = action.get("prev_player_y", state["player_y"])
192
- current_x = state["player_x"]
193
- current_y = state["player_y"]
194
-
195
- if current_x > prev_x:
196
- self.directions_tried.add("RIGHT")
197
- elif current_x < prev_x:
198
- self.directions_tried.add("LEFT")
199
- elif current_y > prev_y:
200
- self.directions_tried.add("DOWN")
201
- elif current_y < prev_y:
202
- self.directions_tried.add("UP")
203
-
204
- if len(self.directions_tried) >= 4:
205
- self.reward_given = True
206
- return 1.0
207
- return 0.0
208
212
 
213
+ # ===== LEGACY COMPONENTS (kept for compatibility) =====
209
214
 
210
- class BuildingExplorationReward(RewardComponent):
211
- """Reward for entering different buildings - +0.5 points per building"""
212
215
 
213
- def __init__(self):
214
- self.buildings_entered: Set[int] = set()
216
+ class BadgeRewardComponent(RewardComponent):
217
+ """Legacy badge reward - now handled by BadgeVictoryReward"""
215
218
 
216
219
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
217
- prev_map = action.get("prev_map_id", -1)
218
- current_map = state["map_id"]
220
+ return 0.0 # Handled by BadgeVictoryReward
219
221
 
220
- # Entering a new building from town
221
- if (
222
- prev_map == 0 and current_map > 0 and current_map not in [1, 2]
223
- ): # From town to new building
224
- if current_map not in self.buildings_entered:
225
- self.buildings_entered.add(current_map)
226
- return 0.5
227
- return 0.0
228
222
 
223
+ class MapTransitionComponent(RewardComponent):
224
+ """Legacy map transition - now handled by RouteExplorationReward"""
225
+
226
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
227
+ return 0.0 # Handled by RouteExplorationReward
229
228
 
230
- class ObjectInteractionReward(RewardComponent):
231
- """Reward for pressing A on various objects - +0.3 points per object"""
232
229
 
233
- def __init__(self):
234
- self.objects_interacted: Set[tuple] = set()
230
+ class BattleVictoryComponent(RewardComponent):
231
+ """Legacy battle victory - now handled by BattleProgressionReward"""
235
232
 
236
233
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
237
- # Detect A button interactions that trigger text
238
- if state["text_box_active"] and not action.get("prev_text_box_active", False):
239
- object_key = (state["player_x"], state["player_y"], state["map_id"])
240
- if object_key not in self.objects_interacted:
241
- self.objects_interacted.add(object_key)
242
- return 0.3
243
- return 0.0
244
-
234
+ return 0.0 # Handled by BattleProgressionReward
245
235
 
246
- class TownExplorationReward(RewardComponent):
247
- """Reward for thorough town exploration - +0.1 per new position"""
248
236
 
249
- def __init__(self):
250
- self.positions_visited: Set[tuple] = set()
237
+ class LevelUpComponent(RewardComponent):
238
+ """Legacy level up - now handled by StrategicTrainingReward"""
251
239
 
252
240
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
253
- if state["map_id"] == 0: # In Pallet Town
254
- position_key = (state["player_x"], state["player_y"])
255
- if position_key not in self.positions_visited:
256
- self.positions_visited.add(position_key)
257
- return 0.1
258
- return 0.0
259
-
241
+ return 0.0 # Handled by StrategicTrainingReward
260
242
 
261
- class RouteAttemptReward(RewardComponent):
262
- """Reward for trying to leave town (triggers story) - +3.0 points"""
263
243
 
264
- def __init__(self):
265
- self.route_attempted = False
244
+ class XPGainComponent(RewardComponent):
245
+ """Legacy XP gain - now handled by StrategicTrainingReward"""
266
246
 
267
247
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
268
- if self.route_attempted:
269
- return 0.0
270
-
271
- # Detect reaching the edge of Pallet Town (attempting to go north)
272
- if state["map_id"] == 0: # In Pallet Town
273
- if state["player_y"] <= 1: # At northern edge
274
- self.route_attempted = True
275
- return 3.0
276
- return 0.0
248
+ return 0.0 # Handled by StrategicTrainingReward