synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,51 @@
1
+ """Task app registry entry for the math demo Modal deployment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contextlib import suppress
6
+ from importlib import import_module
7
+
8
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
9
+
10
+ try:
11
+ from synth_ai.task.apps.math_single_step import build_config as base_build_config
12
+ except ModuleNotFoundError:
13
+ base_module = import_module("examples.rl.task_app.math_single_step")
14
+ base_build_config = base_module.build_config
15
+
16
+ DEMO_MODAL_CONFIG = ModalDeploymentConfig(
17
+ app_name="hendrycks-math-task-app",
18
+ pip_packages=(
19
+ "fastapi>=0.110.0",
20
+ "uvicorn>=0.23.0",
21
+ "pydantic>=2.6.0",
22
+ "httpx>=0.24.0",
23
+ "numpy>=1.24.0",
24
+ "aiohttp>=3.8.0",
25
+ "datasets>=2.16.0",
26
+ "synth-ai",
27
+ ),
28
+ )
29
+
30
+
31
+ def build_config():
32
+ """Reuse the shared math single-step TaskAppConfig."""
33
+
34
+ return base_build_config()
35
+
36
+
37
+ def register_demo_entry() -> None:
38
+ entry = TaskAppEntry(
39
+ app_id="hendrycks-math-demo",
40
+ description="Demo math task app (Modal-focused) shipping with synth-ai demos.",
41
+ config_factory=build_config,
42
+ modal=DEMO_MODAL_CONFIG,
43
+ )
44
+ with suppress(ValueError):
45
+ register_task_app(entry=entry)
46
+
47
+
48
+ register_demo_entry()
49
+
50
+
51
+ __all__ = ["DEMO_MODAL_CONFIG", "build_config", "register_demo_entry"]
@@ -1,4 +1,10 @@
1
- from synth_ai.core.system import System
1
+ class System:
2
+ """Minimal base data structure shared by environment types."""
3
+
4
+ id: str
5
+ name: str
6
+ description: str
7
+ pass
2
8
 
3
9
 
4
10
  class Environment(System):
@@ -4,7 +4,6 @@ from dataclasses import dataclass
4
4
  from typing import Any
5
5
 
6
6
  import numpy as np
7
-
8
7
  from synth_ai.environments.environment.shared_engine import (
9
8
  GetObservationCallable,
10
9
  InternalObservation,
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  from typing import Any
4
4
 
5
5
  from pydantic import BaseModel, Field, ValidationError
6
-
7
6
  from synth_ai.environments.environment.shared_engine import (
8
7
  GetObservationCallable,
9
8
  InternalObservation,
@@ -14,12 +14,15 @@ from synth_ai.environments.stateful.engine import StatefulEngine, StatefulEngine
14
14
  from synth_ai.environments.tasks.core import TaskInstance
15
15
 
16
16
  from .engine_helpers.reward_components import (
17
- BadgeRewardComponent,
18
- BattleVictoryComponent,
19
- LevelUpComponent,
20
- MapTransitionComponent,
17
+ RouteExplorationReward,
18
+ StrategicTrainingReward,
19
+ BattleProgressionReward,
20
+ GymPreparationReward,
21
+ ItemCollectionReward,
22
+ HealingManagementReward,
23
+ EfficientExplorationReward,
24
+ BadgeVictoryReward,
21
25
  StepPenaltyComponent,
22
- XPGainComponent,
23
26
  )
24
27
  from .engine_helpers.state_extraction import extract_game_state
25
28
 
@@ -268,15 +271,27 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
268
271
  # For testing purposes, use None emulator
269
272
  self.emulator = None
270
273
 
271
- # Initialize reward stack with dense components
274
+ # Initialize reward stack with comprehensive progress-based components
272
275
  self.reward_stack = RewardStack(
273
276
  components=[
274
- BadgeRewardComponent(),
275
- MapTransitionComponent(),
276
- BattleVictoryComponent(),
277
- LevelUpComponent(),
278
- XPGainComponent(),
279
- StepPenaltyComponent(),
277
+ # Major progress rewards
278
+ BadgeVictoryReward(), # +50.0 for Boulder Badge (main goal)
279
+ RouteExplorationReward(), # +1.0-5.0 for reaching key areas
280
+ GymPreparationReward(), # +3.0 for being gym-ready
281
+
282
+ # Training and battle rewards
283
+ StrategicTrainingReward(), # +0.2-3.0 for level ups and milestones
284
+ BattleProgressionReward(), # +0.1-1.0 for battles
285
+
286
+ # Resource management rewards
287
+ ItemCollectionReward(), # +0.1-0.5 for collecting items
288
+ HealingManagementReward(), # +0.05-0.8 for healing Pokemon
289
+
290
+ # Exploration efficiency
291
+ EfficientExplorationReward(), # +0.02 for discovering new positions
292
+
293
+ # No penalty for unproductive actions
294
+ StepPenaltyComponent(penalty=0.0), # 0.0 per step
280
295
  ]
281
296
  )
282
297
 
@@ -640,6 +655,12 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
640
655
  "prev_text_box_active": bool(prev_state.get("text_box_active", False)),
641
656
  "prev_enemy_hp_current": int(prev_state.get("enemy_hp_current", 0)),
642
657
  "prev_enemy_hp_percentage": float(prev_state.get("enemy_hp_percentage", 0.0)),
658
+ "prev_player_x": int(prev_state.get("player_x", 0)),
659
+ "prev_player_y": int(prev_state.get("player_y", 0)),
660
+ "prev_party": prev_state.get("party", []),
661
+ "prev_inventory": prev_state.get("inventory", []),
662
+ "prev_party_hp_current": int(prev_state.get("party_hp_current", 0)),
663
+ "prev_party_hp_max": int(prev_state.get("party_hp_max", 0)),
643
664
  },
644
665
  )
645
666
  except Exception as e:
@@ -3,274 +3,246 @@ from typing import Any, Dict, Set
3
3
  from synth_ai.environments.environment.rewards.core import RewardComponent
4
4
 
5
5
 
6
- class BadgeRewardComponent(RewardComponent):
7
- """Reward for earning gym badges"""
6
+ # ===== COMPREHENSIVE POKEMON RED PROGRESS REWARD SYSTEM =====
7
+ # Designed for deterministic rewards that guide toward beating Brock at Pewter Gym
8
8
 
9
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
10
- prev_badges = action.get("prev_badges", 0)
11
- current_badges = state["badges"]
12
- new_badges = current_badges & ~prev_badges
13
- badge_count = bin(new_badges).count("1")
14
- return badge_count * 1.0
15
9
 
10
+ class RouteExplorationReward(RewardComponent):
11
+ """High rewards for reaching key areas on the path to Pewter Gym - guides exploration"""
16
12
 
17
- class MapTransitionComponent(RewardComponent):
18
- """Reward for exploring new areas"""
13
+ def __init__(self):
14
+ self.key_areas_reached: Set[int] = set()
19
15
 
20
16
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
21
- prev_map = action.get("prev_map_id", -1)
22
17
  current_map = state["map_id"]
23
- return 0.1 if current_map != prev_map else 0.0
24
-
18
+ prev_map = action.get("prev_map_id", -1)
25
19
 
26
- class BattleVictoryComponent(RewardComponent):
27
- """Reward for winning battles"""
20
+ # Key maps and rewards for progressing toward Pewter Gym
21
+ area_rewards = {
22
+ 0: 0.0, # Pallet Town (starting point)
23
+ 1: 2.0, # Route 1 - First step out of town (+2.0)
24
+ 2: 1.5, # Viridian City - Major hub (+1.5)
25
+ 3: 1.0, # Route 22 - Path to League (+1.0)
26
+ 4: 1.0, # Route 2 - To Viridian Forest (+1.0)
27
+ 5: 2.0, # Viridian Forest - Dense area (+2.0)
28
+ 6: 1.5, # Pewter City - Target city (+1.5)
29
+ 7: 5.0, # Pewter Gym - GOAL AREA (+5.0 for entering gym)
30
+ }
31
+
32
+ if current_map in area_rewards and current_map not in self.key_areas_reached:
33
+ if prev_map != current_map: # Only reward when actually entering new area
34
+ self.key_areas_reached.add(current_map)
35
+ return area_rewards[current_map]
28
36
 
29
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
30
- prev_in_battle = action.get("prev_in_battle", False)
31
- current_in_battle = state["in_battle"]
32
- battle_outcome = state["battle_outcome"]
33
-
34
- # Transitioning from battle to not in battle with victory
35
- if prev_in_battle and not current_in_battle and battle_outcome == 1:
36
- return 0.5
37
37
  return 0.0
38
38
 
39
39
 
40
- class LevelUpComponent(RewardComponent):
41
- """Reward for Pokemon leveling up"""
40
+ class StrategicTrainingReward(RewardComponent):
41
+ """Rewards for building Pokemon strength strategically"""
42
+
43
+ def __init__(self):
44
+ self.level_milestones: Set[int] = set()
45
+ self.last_level = 0
42
46
 
43
47
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
48
+ current_level = state.get("party_level", 0)
44
49
  prev_level = action.get("prev_party_level", 0)
45
- current_level = state["party_level"]
46
- level_gain = max(0, current_level - prev_level)
47
- return level_gain * 0.3
48
50
 
51
+ # Reward reaching key level milestones
52
+ milestone_rewards = {
53
+ 8: 1.0, # Level 8 - Good for early battles
54
+ 12: 2.0, # Level 12 - Ready for Brock
55
+ 15: 3.0, # Level 15 - Strong Pokemon
56
+ }
49
57
 
50
- class XPGainComponent(RewardComponent):
51
- """Small reward for XP gains"""
58
+ if current_level > prev_level and current_level in milestone_rewards:
59
+ if current_level not in self.level_milestones:
60
+ self.level_milestones.add(current_level)
61
+ return milestone_rewards[current_level]
52
62
 
53
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
54
- prev_xp = action.get("prev_party_xp", 0)
55
- current_xp = state["party_xp"]
56
- xp_gain = max(0, current_xp - prev_xp)
57
- return xp_gain * 0.001 # Very small multiplier
63
+ # Small reward for any level up (0.2 points)
64
+ if current_level > prev_level:
65
+ return 0.2
58
66
 
67
+ return 0.0
59
68
 
60
- class StepPenaltyComponent(RewardComponent):
61
- """Small penalty for each step to encourage efficiency"""
62
69
 
63
- def __init__(self, penalty: float = -0.001):
64
- self.penalty = penalty
70
+ class BattleProgressionReward(RewardComponent):
71
+ """Rewards for winning battles and gaining experience"""
65
72
 
66
73
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
67
- return self.penalty
74
+ prev_in_battle = action.get("prev_in_battle", False)
75
+ current_in_battle = state.get("in_battle", False)
76
+ battle_outcome = state.get("battle_outcome", 0)
68
77
 
78
+ # Large reward for battle victory (+1.0)
79
+ if prev_in_battle and not current_in_battle and battle_outcome == 1:
80
+ return 1.0
69
81
 
70
- class MenuPenaltyComponent(RewardComponent):
71
- """Penalty for excessive menu usage"""
82
+ # Small reward for entering battle (+0.1) - shows engagement
83
+ if not prev_in_battle and current_in_battle:
84
+ return 0.1
72
85
 
73
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
74
- # This would need more sophisticated menu tracking
75
86
  return 0.0
76
87
 
77
88
 
78
- # ===== NEW EARLY GAME PALLET TOWN REWARDS =====
79
-
80
-
81
- class ExitHouseReward(RewardComponent):
82
- """High reward for first time leaving the starting house - +2.0 points"""
89
+ class GymPreparationReward(RewardComponent):
90
+ """Rewards for preparing to challenge Brock"""
83
91
 
84
92
  def __init__(self):
85
- self.house_exited = False
93
+ self.prepared_for_gym = False
86
94
 
87
95
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
88
- if self.house_exited:
96
+ if self.prepared_for_gym:
89
97
  return 0.0
90
98
 
91
- prev_map = action.get("prev_map_id", -1)
92
- current_map = state["map_id"]
99
+ # Check if in Pewter City area and have decent Pokemon
100
+ if state["map_id"] in [6, 7]: # Pewter City or Gym
101
+ party_level = state.get("party_level", 0)
102
+ party_count = len(state.get("party", []))
103
+
104
+ # Reward being prepared for gym battle
105
+ if party_level >= 10 and party_count >= 1:
106
+ self.prepared_for_gym = True
107
+ return 3.0 # Significant reward for being gym-ready
93
108
 
94
- # Exit from house to town (assuming house maps are 1,2 and town is 0)
95
- if prev_map in [1, 2] and current_map == 0:
96
- self.house_exited = True
97
- return 2.0
98
109
  return 0.0
99
110
 
100
111
 
101
- class NPCInteractionReward(RewardComponent):
102
- """Reward for talking to NPCs - +0.8 points per unique NPC"""
112
+ class ItemCollectionReward(RewardComponent):
113
+ """Rewards for collecting useful items"""
103
114
 
104
115
  def __init__(self):
105
- self.npcs_talked_to: Set[tuple] = set()
116
+ self.items_collected: Set[int] = set()
106
117
 
107
118
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
108
- # Detect NPC conversations
109
- if state["text_box_active"] and not action.get("prev_text_box_active", False):
110
- # Use position as NPC identifier
111
- npc_key = (state["player_x"], state["player_y"], state["map_id"])
112
- if npc_key not in self.npcs_talked_to:
113
- self.npcs_talked_to.add(npc_key)
114
- return 0.8
115
- return 0.0
119
+ prev_inventory = action.get("prev_inventory", [])
120
+ current_inventory = state.get("inventory", [])
116
121
 
122
+ # Check for new items
123
+ prev_item_ids = {item["item_id"] for item in prev_inventory}
124
+ current_item_ids = {item["item_id"] for item in current_inventory}
117
125
 
118
- class OakLabDiscoveryReward(RewardComponent):
119
- """High reward for finding and entering Oak's lab - +2.5 points"""
126
+ new_items = current_item_ids - prev_item_ids
120
127
 
121
- def __init__(self):
122
- self.lab_discovered = False
128
+ # Reward valuable items for gym preparation
129
+ valuable_items = {1, 2, 3, 4, 5, 10, 11, 12, 13} # Potions, Balls, etc.
130
+ reward = 0.0
131
+
132
+ for item_id in new_items:
133
+ if item_id not in self.items_collected:
134
+ self.items_collected.add(item_id)
135
+ if item_id in valuable_items:
136
+ reward += 0.5 # +0.5 per valuable item
137
+ else:
138
+ reward += 0.1 # +0.1 per other item
139
+
140
+ return reward
141
+
142
+
143
+ class HealingManagementReward(RewardComponent):
144
+ """Rewards for keeping Pokemon healthy"""
123
145
 
124
146
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
125
- if self.lab_discovered:
147
+ prev_party = action.get("prev_party", [])
148
+ current_party = state.get("party", [])
149
+
150
+ if not prev_party or not current_party:
126
151
  return 0.0
127
152
 
128
- prev_map = action.get("prev_map_id", -1)
129
- current_map = state["map_id"]
153
+ # Reward healing Pokemon back to full health
154
+ prev_hp_pct = sum(p.get("hp_percentage", 0) for p in prev_party) / len(prev_party)
155
+ current_hp_pct = sum(p.get("hp_percentage", 0) for p in current_party) / len(current_party)
156
+
157
+ # Significant improvement in health
158
+ if current_hp_pct > prev_hp_pct + 20: # Healed at least 20% overall
159
+ return 0.8
160
+
161
+ # Small reward for maintaining good health
162
+ if current_hp_pct >= 80 and prev_hp_pct >= 80:
163
+ return 0.05
130
164
 
131
- # Entering Oak's lab (assuming map 3)
132
- if prev_map == 0 and current_map == 3:
133
- self.lab_discovered = True
134
- return 2.5
135
165
  return 0.0
136
166
 
137
167
 
138
- class StarterPokemonReward(RewardComponent):
139
- """Very high reward for getting first Pokemon - +10.0 points"""
168
+ class EfficientExplorationReward(RewardComponent):
169
+ """Rewards for exploring efficiently without getting lost"""
140
170
 
141
171
  def __init__(self):
142
- self.starter_obtained = False
172
+ self.positions_visited: Set[tuple] = set()
143
173
 
144
174
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
145
- if self.starter_obtained:
146
- return 0.0
175
+ # Track unique positions visited in each map
176
+ position_key = (state["map_id"], state["player_x"], state["player_y"])
147
177
 
148
- # Detect getting first Pokemon
149
- prev_party_count = len(action.get("prev_party", []))
150
- current_party_count = len(state.get("party", []))
178
+ if position_key not in self.positions_visited:
179
+ self.positions_visited.add(position_key)
180
+ return 0.02 # Small reward for discovering new areas
151
181
 
152
- if prev_party_count == 0 and current_party_count == 1:
153
- if state["map_id"] == 3: # In Oak's lab
154
- self.starter_obtained = True
155
- return 10.0
156
182
  return 0.0
157
183
 
158
184
 
159
- class FirstBattleReward(RewardComponent):
160
- """High reward for engaging in first battle - +5.0 points"""
161
-
162
- def __init__(self):
163
- self.first_battle = False
185
+ class BadgeVictoryReward(RewardComponent):
186
+ """HUGE reward for achieving the main goal - Boulder Badge"""
164
187
 
165
188
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
166
- if self.first_battle:
167
- return 0.0
189
+ prev_badges = action.get("prev_badges", 0)
190
+ current_badges = state.get("badges", 0)
168
191
 
169
- prev_in_battle = action.get("prev_in_battle", False)
170
- current_in_battle = state["in_battle"]
192
+ # Check if Boulder Badge (bit 0) was newly earned
193
+ boulder_badge_mask = 0x01
194
+ prev_has_badge = prev_badges & boulder_badge_mask
195
+ current_has_badge = current_badges & boulder_badge_mask
196
+
197
+ if not prev_has_badge and current_has_badge:
198
+ return 50.0 # MASSIVE reward for completing the main objective
171
199
 
172
- if not prev_in_battle and current_in_battle:
173
- self.first_battle = True
174
- return 5.0
175
200
  return 0.0
176
201
 
177
202
 
178
- class DirectionExplorationReward(RewardComponent):
179
- """Reward for trying all movement directions - +1.0 points when complete"""
203
+ class StepPenaltyComponent(RewardComponent):
204
+ """Small penalty for each step to encourage efficiency"""
180
205
 
181
- def __init__(self):
182
- self.directions_tried: Set[str] = set()
183
- self.reward_given = False
206
+ def __init__(self, penalty: float = 0.0): # Changed from -0.005 to 0.0
207
+ self.penalty = penalty
184
208
 
185
209
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
186
- if self.reward_given:
187
- return 0.0
210
+ return self.penalty
188
211
 
189
- # Track movement directions based on position changes
190
- prev_x = action.get("prev_player_x", state["player_x"])
191
- prev_y = action.get("prev_player_y", state["player_y"])
192
- current_x = state["player_x"]
193
- current_y = state["player_y"]
194
-
195
- if current_x > prev_x:
196
- self.directions_tried.add("RIGHT")
197
- elif current_x < prev_x:
198
- self.directions_tried.add("LEFT")
199
- elif current_y > prev_y:
200
- self.directions_tried.add("DOWN")
201
- elif current_y < prev_y:
202
- self.directions_tried.add("UP")
203
-
204
- if len(self.directions_tried) >= 4:
205
- self.reward_given = True
206
- return 1.0
207
- return 0.0
208
212
 
213
+ # ===== LEGACY COMPONENTS (kept for compatibility) =====
209
214
 
210
- class BuildingExplorationReward(RewardComponent):
211
- """Reward for entering different buildings - +0.5 points per building"""
212
215
 
213
- def __init__(self):
214
- self.buildings_entered: Set[int] = set()
216
+ class BadgeRewardComponent(RewardComponent):
217
+ """Legacy badge reward - now handled by BadgeVictoryReward"""
215
218
 
216
219
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
217
- prev_map = action.get("prev_map_id", -1)
218
- current_map = state["map_id"]
220
+ return 0.0 # Handled by BadgeVictoryReward
219
221
 
220
- # Entering a new building from town
221
- if (
222
- prev_map == 0 and current_map > 0 and current_map not in [1, 2]
223
- ): # From town to new building
224
- if current_map not in self.buildings_entered:
225
- self.buildings_entered.add(current_map)
226
- return 0.5
227
- return 0.0
228
222
 
223
+ class MapTransitionComponent(RewardComponent):
224
+ """Legacy map transition - now handled by RouteExplorationReward"""
225
+
226
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
227
+ return 0.0 # Handled by RouteExplorationReward
229
228
 
230
- class ObjectInteractionReward(RewardComponent):
231
- """Reward for pressing A on various objects - +0.3 points per object"""
232
229
 
233
- def __init__(self):
234
- self.objects_interacted: Set[tuple] = set()
230
+ class BattleVictoryComponent(RewardComponent):
231
+ """Legacy battle victory - now handled by BattleProgressionReward"""
235
232
 
236
233
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
237
- # Detect A button interactions that trigger text
238
- if state["text_box_active"] and not action.get("prev_text_box_active", False):
239
- object_key = (state["player_x"], state["player_y"], state["map_id"])
240
- if object_key not in self.objects_interacted:
241
- self.objects_interacted.add(object_key)
242
- return 0.3
243
- return 0.0
244
-
234
+ return 0.0 # Handled by BattleProgressionReward
245
235
 
246
- class TownExplorationReward(RewardComponent):
247
- """Reward for thorough town exploration - +0.1 per new position"""
248
236
 
249
- def __init__(self):
250
- self.positions_visited: Set[tuple] = set()
237
+ class LevelUpComponent(RewardComponent):
238
+ """Legacy level up - now handled by StrategicTrainingReward"""
251
239
 
252
240
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
253
- if state["map_id"] == 0: # In Pallet Town
254
- position_key = (state["player_x"], state["player_y"])
255
- if position_key not in self.positions_visited:
256
- self.positions_visited.add(position_key)
257
- return 0.1
258
- return 0.0
259
-
241
+ return 0.0 # Handled by StrategicTrainingReward
260
242
 
261
- class RouteAttemptReward(RewardComponent):
262
- """Reward for trying to leave town (triggers story) - +3.0 points"""
263
243
 
264
- def __init__(self):
265
- self.route_attempted = False
244
+ class XPGainComponent(RewardComponent):
245
+ """Legacy XP gain - now handled by StrategicTrainingReward"""
266
246
 
267
247
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
268
- if self.route_attempted:
269
- return 0.0
270
-
271
- # Detect reaching the edge of Pallet Town (attempting to go north)
272
- if state["map_id"] == 0: # In Pallet Town
273
- if state["player_y"] <= 1: # At northern edge
274
- self.route_attempted = True
275
- return 3.0
276
- return 0.0
248
+ return 0.0 # Handled by StrategicTrainingReward
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from typing import Any, Dict, List, Optional, Union
4
4
  import base64
5
+ import time
5
6
  from io import BytesIO
6
7
 
7
8
  from pydantic import BaseModel, Field
@@ -19,6 +20,8 @@ from synth_ai.environments.environment.tools import (
19
20
  )
20
21
  from synth_ai.environments.reproducibility.core import ReproducibleEnvironment
21
22
  from synth_ai.environments.stateful.core import StatefulEnvironment
23
+ from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
24
+ from synth_ai.tracing_v3.session_tracer import SessionTracer
22
25
  try: # optional for image encoding
23
26
  import numpy as _np # type: ignore
24
27
  from PIL import Image as _PILImage # type: ignore
@@ -121,6 +124,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
121
124
  task_instance: Optional[PokemonRedTaskInstance] = None,
122
125
  custom_step_obs: Optional[GetObservationCallable] = None,
123
126
  custom_ckpt_obs: Optional[GetObservationCallable] = None,
127
+ tracer: Optional[SessionTracer] = None,
124
128
  ):
125
129
  self.name = "PokemonRed"
126
130
  self.task_instance = task_instance or DEFAULT_TASK_INSTANCE
@@ -129,6 +133,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
129
133
  custom_ckpt_obs or PokemonRedObservationCallable()
130
134
  )
131
135
  self.engine = PokemonRedEngine(self.task_instance)
136
+ self.tracer = tracer
132
137
 
133
138
  # Register tools
134
139
  self._press_button_tool = PressButtonTool(self.engine)
@@ -203,6 +208,27 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
203
208
  if tool_result.error and hasattr(pub_state, "error_info"):
204
209
  pub_state.error_info = tool_result.error
205
210
 
211
+ # Record EnvironmentEvent for tracing if tracer is available
212
+ if self.tracer and hasattr(priv_state, 'reward_last_step'):
213
+ # Get state information for the event
214
+ prev_state = getattr(self.engine, '_previous_state', None)
215
+ terminated = getattr(priv_state, 'terminated', False)
216
+ truncated = getattr(priv_state, 'truncated', False)
217
+
218
+ # Convert states to dict for serialization
219
+ pub_state_dict = pub_state.__dict__ if hasattr(pub_state, '__dict__') else pub_state
220
+
221
+ env_event = EnvironmentEvent(
222
+ system_instance_id="pokemon_red_env",
223
+ time_record=TimeRecord(event_time=time.time()),
224
+ reward=float(priv_state.reward_last_step),
225
+ terminated=terminated,
226
+ truncated=truncated,
227
+ system_state_before=prev_state if prev_state else None,
228
+ system_state_after=pub_state_dict,
229
+ )
230
+ await self.tracer.record_event(env_event)
231
+
206
232
  return await self._to_observation(
207
233
  priv_state, pub_state, self.custom_step_observation_callable
208
234
  )