synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -3,274 +3,246 @@ from typing import Any, Dict, Set
3
3
  from synth_ai.environments.environment.rewards.core import RewardComponent
4
4
 
5
5
 
6
- class BadgeRewardComponent(RewardComponent):
7
- """Reward for earning gym badges"""
6
+ # ===== COMPREHENSIVE POKEMON RED PROGRESS REWARD SYSTEM =====
7
+ # Designed for deterministic rewards that guide toward beating Brock at Pewter Gym
8
8
 
9
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
10
- prev_badges = action.get("prev_badges", 0)
11
- current_badges = state["badges"]
12
- new_badges = current_badges & ~prev_badges
13
- badge_count = bin(new_badges).count("1")
14
- return badge_count * 1.0
15
9
 
10
+ class RouteExplorationReward(RewardComponent):
11
+ """High rewards for reaching key areas on the path to Pewter Gym - guides exploration"""
16
12
 
17
- class MapTransitionComponent(RewardComponent):
18
- """Reward for exploring new areas"""
13
+ def __init__(self):
14
+ self.key_areas_reached: Set[int] = set()
19
15
 
20
16
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
21
- prev_map = action.get("prev_map_id", -1)
22
17
  current_map = state["map_id"]
23
- return 0.1 if current_map != prev_map else 0.0
24
-
18
+ prev_map = action.get("prev_map_id", -1)
25
19
 
26
- class BattleVictoryComponent(RewardComponent):
27
- """Reward for winning battles"""
20
+ # Key maps and rewards for progressing toward Pewter Gym
21
+ area_rewards = {
22
+ 0: 0.0, # Pallet Town (starting point)
23
+ 1: 2.0, # Route 1 - First step out of town (+2.0)
24
+ 2: 1.5, # Viridian City - Major hub (+1.5)
25
+ 3: 1.0, # Route 22 - Path to League (+1.0)
26
+ 4: 1.0, # Route 2 - To Viridian Forest (+1.0)
27
+ 5: 2.0, # Viridian Forest - Dense area (+2.0)
28
+ 6: 1.5, # Pewter City - Target city (+1.5)
29
+ 7: 5.0, # Pewter Gym - GOAL AREA (+5.0 for entering gym)
30
+ }
31
+
32
+ if current_map in area_rewards and current_map not in self.key_areas_reached:
33
+ if prev_map != current_map: # Only reward when actually entering new area
34
+ self.key_areas_reached.add(current_map)
35
+ return area_rewards[current_map]
28
36
 
29
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
30
- prev_in_battle = action.get("prev_in_battle", False)
31
- current_in_battle = state["in_battle"]
32
- battle_outcome = state["battle_outcome"]
33
-
34
- # Transitioning from battle to not in battle with victory
35
- if prev_in_battle and not current_in_battle and battle_outcome == 1:
36
- return 0.5
37
37
  return 0.0
38
38
 
39
39
 
40
- class LevelUpComponent(RewardComponent):
41
- """Reward for Pokemon leveling up"""
40
+ class StrategicTrainingReward(RewardComponent):
41
+ """Rewards for building Pokemon strength strategically"""
42
+
43
+ def __init__(self):
44
+ self.level_milestones: Set[int] = set()
45
+ self.last_level = 0
42
46
 
43
47
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
48
+ current_level = state.get("party_level", 0)
44
49
  prev_level = action.get("prev_party_level", 0)
45
- current_level = state["party_level"]
46
- level_gain = max(0, current_level - prev_level)
47
- return level_gain * 0.3
48
50
 
51
+ # Reward reaching key level milestones
52
+ milestone_rewards = {
53
+ 8: 1.0, # Level 8 - Good for early battles
54
+ 12: 2.0, # Level 12 - Ready for Brock
55
+ 15: 3.0, # Level 15 - Strong Pokemon
56
+ }
49
57
 
50
- class XPGainComponent(RewardComponent):
51
- """Small reward for XP gains"""
58
+ if current_level > prev_level and current_level in milestone_rewards:
59
+ if current_level not in self.level_milestones:
60
+ self.level_milestones.add(current_level)
61
+ return milestone_rewards[current_level]
52
62
 
53
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
54
- prev_xp = action.get("prev_party_xp", 0)
55
- current_xp = state["party_xp"]
56
- xp_gain = max(0, current_xp - prev_xp)
57
- return xp_gain * 0.001 # Very small multiplier
63
+ # Small reward for any level up (0.2 points)
64
+ if current_level > prev_level:
65
+ return 0.2
58
66
 
67
+ return 0.0
59
68
 
60
- class StepPenaltyComponent(RewardComponent):
61
- """Small penalty for each step to encourage efficiency"""
62
69
 
63
- def __init__(self, penalty: float = -0.001):
64
- self.penalty = penalty
70
+ class BattleProgressionReward(RewardComponent):
71
+ """Rewards for winning battles and gaining experience"""
65
72
 
66
73
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
67
- return self.penalty
74
+ prev_in_battle = action.get("prev_in_battle", False)
75
+ current_in_battle = state.get("in_battle", False)
76
+ battle_outcome = state.get("battle_outcome", 0)
68
77
 
78
+ # Large reward for battle victory (+1.0)
79
+ if prev_in_battle and not current_in_battle and battle_outcome == 1:
80
+ return 1.0
69
81
 
70
- class MenuPenaltyComponent(RewardComponent):
71
- """Penalty for excessive menu usage"""
82
+ # Small reward for entering battle (+0.1) - shows engagement
83
+ if not prev_in_battle and current_in_battle:
84
+ return 0.1
72
85
 
73
- async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
74
- # This would need more sophisticated menu tracking
75
86
  return 0.0
76
87
 
77
88
 
78
- # ===== NEW EARLY GAME PALLET TOWN REWARDS =====
79
-
80
-
81
- class ExitHouseReward(RewardComponent):
82
- """High reward for first time leaving the starting house - +2.0 points"""
89
+ class GymPreparationReward(RewardComponent):
90
+ """Rewards for preparing to challenge Brock"""
83
91
 
84
92
  def __init__(self):
85
- self.house_exited = False
93
+ self.prepared_for_gym = False
86
94
 
87
95
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
88
- if self.house_exited:
96
+ if self.prepared_for_gym:
89
97
  return 0.0
90
98
 
91
- prev_map = action.get("prev_map_id", -1)
92
- current_map = state["map_id"]
99
+ # Check if in Pewter City area and have decent Pokemon
100
+ if state["map_id"] in [6, 7]: # Pewter City or Gym
101
+ party_level = state.get("party_level", 0)
102
+ party_count = len(state.get("party", []))
103
+
104
+ # Reward being prepared for gym battle
105
+ if party_level >= 10 and party_count >= 1:
106
+ self.prepared_for_gym = True
107
+ return 3.0 # Significant reward for being gym-ready
93
108
 
94
- # Exit from house to town (assuming house maps are 1,2 and town is 0)
95
- if prev_map in [1, 2] and current_map == 0:
96
- self.house_exited = True
97
- return 2.0
98
109
  return 0.0
99
110
 
100
111
 
101
- class NPCInteractionReward(RewardComponent):
102
- """Reward for talking to NPCs - +0.8 points per unique NPC"""
112
+ class ItemCollectionReward(RewardComponent):
113
+ """Rewards for collecting useful items"""
103
114
 
104
115
  def __init__(self):
105
- self.npcs_talked_to: Set[tuple] = set()
116
+ self.items_collected: Set[int] = set()
106
117
 
107
118
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
108
- # Detect NPC conversations
109
- if state["text_box_active"] and not action.get("prev_text_box_active", False):
110
- # Use position as NPC identifier
111
- npc_key = (state["player_x"], state["player_y"], state["map_id"])
112
- if npc_key not in self.npcs_talked_to:
113
- self.npcs_talked_to.add(npc_key)
114
- return 0.8
115
- return 0.0
119
+ prev_inventory = action.get("prev_inventory", [])
120
+ current_inventory = state.get("inventory", [])
116
121
 
122
+ # Check for new items
123
+ prev_item_ids = {item["item_id"] for item in prev_inventory}
124
+ current_item_ids = {item["item_id"] for item in current_inventory}
117
125
 
118
- class OakLabDiscoveryReward(RewardComponent):
119
- """High reward for finding and entering Oak's lab - +2.5 points"""
126
+ new_items = current_item_ids - prev_item_ids
120
127
 
121
- def __init__(self):
122
- self.lab_discovered = False
128
+ # Reward valuable items for gym preparation
129
+ valuable_items = {1, 2, 3, 4, 5, 10, 11, 12, 13} # Potions, Balls, etc.
130
+ reward = 0.0
131
+
132
+ for item_id in new_items:
133
+ if item_id not in self.items_collected:
134
+ self.items_collected.add(item_id)
135
+ if item_id in valuable_items:
136
+ reward += 0.5 # +0.5 per valuable item
137
+ else:
138
+ reward += 0.1 # +0.1 per other item
139
+
140
+ return reward
141
+
142
+
143
+ class HealingManagementReward(RewardComponent):
144
+ """Rewards for keeping Pokemon healthy"""
123
145
 
124
146
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
125
- if self.lab_discovered:
147
+ prev_party = action.get("prev_party", [])
148
+ current_party = state.get("party", [])
149
+
150
+ if not prev_party or not current_party:
126
151
  return 0.0
127
152
 
128
- prev_map = action.get("prev_map_id", -1)
129
- current_map = state["map_id"]
153
+ # Reward healing Pokemon back to full health
154
+ prev_hp_pct = sum(p.get("hp_percentage", 0) for p in prev_party) / len(prev_party)
155
+ current_hp_pct = sum(p.get("hp_percentage", 0) for p in current_party) / len(current_party)
156
+
157
+ # Significant improvement in health
158
+ if current_hp_pct > prev_hp_pct + 20: # Healed at least 20% overall
159
+ return 0.8
160
+
161
+ # Small reward for maintaining good health
162
+ if current_hp_pct >= 80 and prev_hp_pct >= 80:
163
+ return 0.05
130
164
 
131
- # Entering Oak's lab (assuming map 3)
132
- if prev_map == 0 and current_map == 3:
133
- self.lab_discovered = True
134
- return 2.5
135
165
  return 0.0
136
166
 
137
167
 
138
- class StarterPokemonReward(RewardComponent):
139
- """Very high reward for getting first Pokemon - +10.0 points"""
168
+ class EfficientExplorationReward(RewardComponent):
169
+ """Rewards for exploring efficiently without getting lost"""
140
170
 
141
171
  def __init__(self):
142
- self.starter_obtained = False
172
+ self.positions_visited: Set[tuple] = set()
143
173
 
144
174
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
145
- if self.starter_obtained:
146
- return 0.0
175
+ # Track unique positions visited in each map
176
+ position_key = (state["map_id"], state["player_x"], state["player_y"])
147
177
 
148
- # Detect getting first Pokemon
149
- prev_party_count = len(action.get("prev_party", []))
150
- current_party_count = len(state.get("party", []))
178
+ if position_key not in self.positions_visited:
179
+ self.positions_visited.add(position_key)
180
+ return 0.02 # Small reward for discovering new areas
151
181
 
152
- if prev_party_count == 0 and current_party_count == 1:
153
- if state["map_id"] == 3: # In Oak's lab
154
- self.starter_obtained = True
155
- return 10.0
156
182
  return 0.0
157
183
 
158
184
 
159
- class FirstBattleReward(RewardComponent):
160
- """High reward for engaging in first battle - +5.0 points"""
161
-
162
- def __init__(self):
163
- self.first_battle = False
185
+ class BadgeVictoryReward(RewardComponent):
186
+ """HUGE reward for achieving the main goal - Boulder Badge"""
164
187
 
165
188
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
166
- if self.first_battle:
167
- return 0.0
189
+ prev_badges = action.get("prev_badges", 0)
190
+ current_badges = state.get("badges", 0)
168
191
 
169
- prev_in_battle = action.get("prev_in_battle", False)
170
- current_in_battle = state["in_battle"]
192
+ # Check if Boulder Badge (bit 0) was newly earned
193
+ boulder_badge_mask = 0x01
194
+ prev_has_badge = prev_badges & boulder_badge_mask
195
+ current_has_badge = current_badges & boulder_badge_mask
196
+
197
+ if not prev_has_badge and current_has_badge:
198
+ return 50.0 # MASSIVE reward for completing the main objective
171
199
 
172
- if not prev_in_battle and current_in_battle:
173
- self.first_battle = True
174
- return 5.0
175
200
  return 0.0
176
201
 
177
202
 
178
- class DirectionExplorationReward(RewardComponent):
179
- """Reward for trying all movement directions - +1.0 points when complete"""
203
+ class StepPenaltyComponent(RewardComponent):
204
+ """Small penalty for each step to encourage efficiency"""
180
205
 
181
- def __init__(self):
182
- self.directions_tried: Set[str] = set()
183
- self.reward_given = False
206
+ def __init__(self, penalty: float = 0.0): # Changed from -0.005 to 0.0
207
+ self.penalty = penalty
184
208
 
185
209
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
186
- if self.reward_given:
187
- return 0.0
210
+ return self.penalty
188
211
 
189
- # Track movement directions based on position changes
190
- prev_x = action.get("prev_player_x", state["player_x"])
191
- prev_y = action.get("prev_player_y", state["player_y"])
192
- current_x = state["player_x"]
193
- current_y = state["player_y"]
194
-
195
- if current_x > prev_x:
196
- self.directions_tried.add("RIGHT")
197
- elif current_x < prev_x:
198
- self.directions_tried.add("LEFT")
199
- elif current_y > prev_y:
200
- self.directions_tried.add("DOWN")
201
- elif current_y < prev_y:
202
- self.directions_tried.add("UP")
203
-
204
- if len(self.directions_tried) >= 4:
205
- self.reward_given = True
206
- return 1.0
207
- return 0.0
208
212
 
213
+ # ===== LEGACY COMPONENTS (kept for compatibility) =====
209
214
 
210
- class BuildingExplorationReward(RewardComponent):
211
- """Reward for entering different buildings - +0.5 points per building"""
212
215
 
213
- def __init__(self):
214
- self.buildings_entered: Set[int] = set()
216
+ class BadgeRewardComponent(RewardComponent):
217
+ """Legacy badge reward - now handled by BadgeVictoryReward"""
215
218
 
216
219
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
217
- prev_map = action.get("prev_map_id", -1)
218
- current_map = state["map_id"]
220
+ return 0.0 # Handled by BadgeVictoryReward
219
221
 
220
- # Entering a new building from town
221
- if (
222
- prev_map == 0 and current_map > 0 and current_map not in [1, 2]
223
- ): # From town to new building
224
- if current_map not in self.buildings_entered:
225
- self.buildings_entered.add(current_map)
226
- return 0.5
227
- return 0.0
228
222
 
223
+ class MapTransitionComponent(RewardComponent):
224
+ """Legacy map transition - now handled by RouteExplorationReward"""
225
+
226
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
227
+ return 0.0 # Handled by RouteExplorationReward
229
228
 
230
- class ObjectInteractionReward(RewardComponent):
231
- """Reward for pressing A on various objects - +0.3 points per object"""
232
229
 
233
- def __init__(self):
234
- self.objects_interacted: Set[tuple] = set()
230
+ class BattleVictoryComponent(RewardComponent):
231
+ """Legacy battle victory - now handled by BattleProgressionReward"""
235
232
 
236
233
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
237
- # Detect A button interactions that trigger text
238
- if state["text_box_active"] and not action.get("prev_text_box_active", False):
239
- object_key = (state["player_x"], state["player_y"], state["map_id"])
240
- if object_key not in self.objects_interacted:
241
- self.objects_interacted.add(object_key)
242
- return 0.3
243
- return 0.0
244
-
234
+ return 0.0 # Handled by BattleProgressionReward
245
235
 
246
- class TownExplorationReward(RewardComponent):
247
- """Reward for thorough town exploration - +0.1 per new position"""
248
236
 
249
- def __init__(self):
250
- self.positions_visited: Set[tuple] = set()
237
+ class LevelUpComponent(RewardComponent):
238
+ """Legacy level up - now handled by StrategicTrainingReward"""
251
239
 
252
240
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
253
- if state["map_id"] == 0: # In Pallet Town
254
- position_key = (state["player_x"], state["player_y"])
255
- if position_key not in self.positions_visited:
256
- self.positions_visited.add(position_key)
257
- return 0.1
258
- return 0.0
259
-
241
+ return 0.0 # Handled by StrategicTrainingReward
260
242
 
261
- class RouteAttemptReward(RewardComponent):
262
- """Reward for trying to leave town (triggers story) - +3.0 points"""
263
243
 
264
- def __init__(self):
265
- self.route_attempted = False
244
+ class XPGainComponent(RewardComponent):
245
+ """Legacy XP gain - now handled by StrategicTrainingReward"""
266
246
 
267
247
  async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
268
- if self.route_attempted:
269
- return 0.0
270
-
271
- # Detect reaching the edge of Pallet Town (attempting to go north)
272
- if state["map_id"] == 0: # In Pallet Town
273
- if state["player_y"] <= 1: # At northern edge
274
- self.route_attempted = True
275
- return 3.0
276
- return 0.0
248
+ return 0.0 # Handled by StrategicTrainingReward
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from typing import Any, Dict, List, Optional, Union
4
4
  import base64
5
+ import time
5
6
  from io import BytesIO
6
7
 
7
8
  from pydantic import BaseModel, Field
@@ -19,6 +20,8 @@ from synth_ai.environments.environment.tools import (
19
20
  )
20
21
  from synth_ai.environments.reproducibility.core import ReproducibleEnvironment
21
22
  from synth_ai.environments.stateful.core import StatefulEnvironment
23
+ from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
24
+ from synth_ai.tracing_v3.session_tracer import SessionTracer
22
25
  try: # optional for image encoding
23
26
  import numpy as _np # type: ignore
24
27
  from PIL import Image as _PILImage # type: ignore
@@ -121,6 +124,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
121
124
  task_instance: Optional[PokemonRedTaskInstance] = None,
122
125
  custom_step_obs: Optional[GetObservationCallable] = None,
123
126
  custom_ckpt_obs: Optional[GetObservationCallable] = None,
127
+ tracer: Optional[SessionTracer] = None,
124
128
  ):
125
129
  self.name = "PokemonRed"
126
130
  self.task_instance = task_instance or DEFAULT_TASK_INSTANCE
@@ -129,6 +133,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
129
133
  custom_ckpt_obs or PokemonRedObservationCallable()
130
134
  )
131
135
  self.engine = PokemonRedEngine(self.task_instance)
136
+ self.tracer = tracer
132
137
 
133
138
  # Register tools
134
139
  self._press_button_tool = PressButtonTool(self.engine)
@@ -203,6 +208,27 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
203
208
  if tool_result.error and hasattr(pub_state, "error_info"):
204
209
  pub_state.error_info = tool_result.error
205
210
 
211
+ # Record EnvironmentEvent for tracing if tracer is available
212
+ if self.tracer and hasattr(priv_state, 'reward_last_step'):
213
+ # Get state information for the event
214
+ prev_state = getattr(self.engine, '_previous_state', None)
215
+ terminated = getattr(priv_state, 'terminated', False)
216
+ truncated = getattr(priv_state, 'truncated', False)
217
+
218
+ # Convert states to dict for serialization
219
+ pub_state_dict = pub_state.__dict__ if hasattr(pub_state, '__dict__') else pub_state
220
+
221
+ env_event = EnvironmentEvent(
222
+ system_instance_id="pokemon_red_env",
223
+ time_record=TimeRecord(event_time=time.time()),
224
+ reward=float(priv_state.reward_last_step),
225
+ terminated=terminated,
226
+ truncated=truncated,
227
+ system_state_before=prev_state if prev_state else None,
228
+ system_state_after=pub_state_dict,
229
+ )
230
+ await self.tracer.record_event(env_event)
231
+
206
232
  return await self._to_observation(
207
233
  priv_state, pub_state, self.custom_step_observation_callable
208
234
  )
@@ -0,0 +1,168 @@
1
+ """
2
+ Trace hooks for Pokemon Red environment - v3 version.
3
+ Captures reward information and saves to Turso database.
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Any, Dict, Optional
8
+
9
+ from synth_ai.tracing_v3.abstractions import BaseEvent, EnvironmentEvent
10
+ from synth_ai.tracing_v3.hooks import HookManager
11
+
12
+ # Pokemon Red achievement categories by reward value
13
+ EXPLORATION_ACHIEVEMENTS = {
14
+ 0.02: "explore_new_area",
15
+ 0.04: "explore_multiple_areas",
16
+ 1.0: "leave_starting_area",
17
+ 1.5: "enter_new_city",
18
+ 2.0: "explore_new_route",
19
+ 5.0: "enter_gym_building",
20
+ }
21
+
22
+ TRAINING_ACHIEVEMENTS = {
23
+ 0.2: "pokemon_level_up",
24
+ 0.3: "reach_power_level",
25
+ 3.0: "pokemon_ready_for_battle",
26
+ }
27
+
28
+ BATTLE_ACHIEVEMENTS = {
29
+ 0.1: "encounter_wild_pokemon",
30
+ }
31
+
32
+ RESOURCE_ACHIEVEMENTS = {
33
+ 0.05: "keep_pokemon_healthy",
34
+ 0.5: "find_valuable_item",
35
+ 0.8: "visit_pokemon_center",
36
+ }
37
+
38
+ MAJOR_ACHIEVEMENTS = {
39
+ 50.0: "defeat_brock_win_badge",
40
+ }
41
+
42
+
43
+ async def track_pokemon_rewards(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
44
+ """Hook that captures detailed Pokemon Red reward information."""
45
+ # Only process EnvironmentEvents
46
+ if not isinstance(event_obj, EnvironmentEvent):
47
+ return None
48
+
49
+ reward = event_obj.reward
50
+ if reward is None or reward == 0.0:
51
+ return None
52
+
53
+ # Determine achievement type based on reward value
54
+ achievement_type = "unknown"
55
+ achievement_category = "other"
56
+
57
+ # Check each category
58
+ if reward in EXPLORATION_ACHIEVEMENTS:
59
+ achievement_type = EXPLORATION_ACHIEVEMENTS[reward]
60
+ achievement_category = "exploration"
61
+ elif reward in TRAINING_ACHIEVEMENTS:
62
+ achievement_type = TRAINING_ACHIEVEMENTS[reward]
63
+ achievement_category = "training"
64
+ elif reward in BATTLE_ACHIEVEMENTS:
65
+ achievement_type = BATTLE_ACHIEVEMENTS[reward]
66
+ achievement_category = "battle"
67
+ elif reward in RESOURCE_ACHIEVEMENTS:
68
+ achievement_type = RESOURCE_ACHIEVEMENTS[reward]
69
+ achievement_category = "resource"
70
+ elif reward in MAJOR_ACHIEVEMENTS:
71
+ achievement_type = MAJOR_ACHIEVEMENTS[reward]
72
+ achievement_category = "major"
73
+
74
+ return {
75
+ "reward_value": reward,
76
+ "achievement_type": achievement_type,
77
+ "achievement_category": achievement_category,
78
+ "timestamp": datetime.now().isoformat(),
79
+ "system_state_before": event_obj.system_state_before,
80
+ "system_state_after": event_obj.system_state_after,
81
+ }
82
+
83
+
84
+ async def track_pokemon_milestones(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
85
+ """Hook that tracks significant Pokemon Red milestones."""
86
+ # Only process EnvironmentEvents
87
+ if not isinstance(event_obj, EnvironmentEvent):
88
+ return None
89
+
90
+ reward = event_obj.reward
91
+ if reward is None:
92
+ return None
93
+
94
+ # Track major milestones
95
+ if reward >= 1.0: # Significant progress rewards
96
+ return {
97
+ "milestone": "major_progress",
98
+ "reward": reward,
99
+ "timestamp": datetime.now().isoformat(),
100
+ }
101
+ elif reward >= 0.5: # Moderate rewards
102
+ return {
103
+ "milestone": "moderate_progress",
104
+ "reward": reward,
105
+ "timestamp": datetime.now().isoformat(),
106
+ }
107
+
108
+ return None
109
+
110
+
111
+ async def track_pokemon_outcomes(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
112
+ """Hook that tracks episode outcomes for Pokemon Red."""
113
+ # Only process EnvironmentEvents
114
+ if not isinstance(event_obj, EnvironmentEvent):
115
+ return None
116
+
117
+ # Check for termination conditions
118
+ if event_obj.terminated or event_obj.truncated:
119
+ total_reward = getattr(event_obj, 'total_reward', 0.0)
120
+ steps_taken = getattr(event_obj, 'step_count', 0)
121
+
122
+ # Extract achievement information from system state
123
+ achievements_count = 0
124
+ if event_obj.system_state_after:
125
+ # Count positive rewards as achievements
126
+ # This is a simplified count - in practice you'd track actual achievements
127
+ achievements_count = max(1, int(total_reward / 0.1)) # Rough estimate
128
+
129
+ return {
130
+ "outcome_type": "episode_end",
131
+ "total_reward": total_reward,
132
+ "steps_taken": steps_taken,
133
+ "achievements_count": achievements_count,
134
+ "terminated": event_obj.terminated,
135
+ "truncated": event_obj.truncated,
136
+ "timestamp": datetime.now().isoformat(),
137
+ }
138
+
139
+ return None
140
+
141
+
142
+ # Create the global POKEMON_RED_HOOKS instance
143
+ POKEMON_RED_HOOKS = HookManager()
144
+
145
+ # Register all hooks
146
+ POKEMON_RED_HOOKS.register(
147
+ "event_recorded",
148
+ track_pokemon_rewards,
149
+ name="pokemon_rewards",
150
+ priority=10,
151
+ event_types=["environment"],
152
+ )
153
+
154
+ POKEMON_RED_HOOKS.register(
155
+ "event_recorded",
156
+ track_pokemon_milestones,
157
+ name="pokemon_milestones",
158
+ priority=5,
159
+ event_types=["environment"],
160
+ )
161
+
162
+ POKEMON_RED_HOOKS.register(
163
+ "event_recorded",
164
+ track_pokemon_outcomes,
165
+ name="pokemon_outcomes",
166
+ priority=5,
167
+ event_types=["environment"],
168
+ )
synth_ai/http.py ADDED
@@ -0,0 +1,12 @@
1
+ """
2
+ Backward-compatible HTTP client exports.
3
+
4
+ Historically, some modules imported ``synth_ai.http``. The canonical location
5
+ is ``synth_ai.http_client``; this module simply re-exports the same symbols so
6
+ legacy imports keep working.
7
+ """
8
+
9
+
10
+ from synth_ai.http_client import AsyncHttpClient, HTTPError, sleep
11
+
12
+ __all__ = ["AsyncHttpClient", "HTTPError", "sleep"]