synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
  245. synth_ai/zyk/lms/caching/constants.py +0 -1
  246. synth_ai/zyk/lms/cost/monitor.py +0 -1
  247. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  248. synth_ai-0.2.0.dist-info/METADATA +0 -36
  249. synth_ai-0.2.0.dist-info/RECORD +0 -50
  250. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  251. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  253. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  254. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  255. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  256. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  259. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  260. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  261. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  264. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  265. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
  266. {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,558 @@
1
+ """
2
+ Pallet Town Early Game Reward Components
3
+
4
+ Rewards specifically designed for the first 50 steps of Pokemon Red,
5
+ focusing on house exploration, town discovery, and story triggers.
6
+ """
7
+
8
+ from synth_ai.environments.environment.rewards.core import RewardComponent
9
+ from typing import Dict, Any, Set
10
+
11
+
12
+ class LeaveStartingRoomReward(RewardComponent):
13
+ """Reward for going downstairs from bedroom - +15 points"""
14
+
15
+ def __init__(self):
16
+ self.triggered = False
17
+
18
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
19
+ if self.triggered:
20
+ return 0.0
21
+
22
+ prev_map = action.get("prev_map_id", -1)
23
+ current_map = state["map_id"]
24
+ prev_y = action.get("prev_player_y", -1)
25
+ current_y = state["player_y"]
26
+
27
+ # Detect going downstairs from bedroom (map change + y coordinate change)
28
+ if prev_map != current_map and prev_y > current_y:
29
+ self.triggered = True
30
+ return 15.0
31
+ return 0.0
32
+
33
+
34
+ class TalkToMomReward(RewardComponent):
35
+ """Reward for first conversation with mom - +10 points"""
36
+
37
+ def __init__(self):
38
+ self.mom_talked_to = False
39
+
40
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
41
+ if self.mom_talked_to:
42
+ return 0.0
43
+
44
+ # Check if we're in mom's house and had a text interaction
45
+ if state["map_id"] in [1, 2] and state["text_box_active"]: # Assuming house maps
46
+ prev_text_active = action.get("prev_text_box_active", False)
47
+ if not prev_text_active and state["text_box_active"]:
48
+ self.mom_talked_to = True
49
+ return 10.0
50
+ return 0.0
51
+
52
+
53
+ class InteractWithTVReward(RewardComponent):
54
+ """Reward for checking the TV downstairs - +5 points"""
55
+
56
+ def __init__(self):
57
+ self.tv_checked = False
58
+
59
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
60
+ if self.tv_checked:
61
+ return 0.0
62
+
63
+ # Detect TV interaction in house
64
+ if state["map_id"] in [1, 2] and state["text_box_active"]:
65
+ prev_text_active = action.get("prev_text_box_active", False)
66
+ if not prev_text_active and state["text_box_active"]:
67
+ # Simple heuristic: TV is usually in certain positions
68
+ player_x, player_y = state["player_x"], state["player_y"]
69
+ if (player_x, player_y) in [
70
+ (3, 4),
71
+ (4, 4),
72
+ (5, 4),
73
+ ]: # Common TV positions
74
+ self.tv_checked = True
75
+ return 5.0
76
+ return 0.0
77
+
78
+
79
+ class CheckComputerReward(RewardComponent):
80
+ """Reward for interacting with PC in room - +5 points"""
81
+
82
+ def __init__(self):
83
+ self.pc_checked = False
84
+
85
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
86
+ if self.pc_checked:
87
+ return 0.0
88
+
89
+ # Detect PC interaction in bedroom
90
+ if state["map_id"] == 1 and state["text_box_active"]: # Bedroom
91
+ prev_text_active = action.get("prev_text_box_active", False)
92
+ if not prev_text_active and state["text_box_active"]:
93
+ # PC is usually in upper right of bedroom
94
+ player_x, player_y = state["player_x"], state["player_y"]
95
+ if player_x >= 6 and player_y <= 3:
96
+ self.pc_checked = True
97
+ return 5.0
98
+ return 0.0
99
+
100
+
101
+ class HouseFullyExploredReward(RewardComponent):
102
+ """Reward for checking all interactive objects in starting house - +20 points"""
103
+
104
+ def __init__(self):
105
+ self.interactions: Set[str] = set()
106
+ self.required_interactions = {"tv", "pc", "mom", "bookshelf", "poster"}
107
+
108
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
109
+ if len(self.interactions) >= len(self.required_interactions):
110
+ return 0.0
111
+
112
+ # Track interactions in house
113
+ if state["map_id"] in [1, 2] and state["text_box_active"]:
114
+ prev_text_active = action.get("prev_text_box_active", False)
115
+ if not prev_text_active and state["text_box_active"]:
116
+ player_x, player_y = state["player_x"], state["player_y"]
117
+
118
+ # Map positions to interaction types
119
+ if (player_x, player_y) in [(3, 4), (4, 4), (5, 4)]:
120
+ self.interactions.add("tv")
121
+ elif player_x >= 6 and player_y <= 3:
122
+ self.interactions.add("pc")
123
+ elif (player_x, player_y) in [(1, 4), (2, 4)]:
124
+ self.interactions.add("mom")
125
+ # Add more position mappings as needed
126
+
127
+ if len(self.interactions) >= len(self.required_interactions):
128
+ return 20.0
129
+ return 0.0
130
+
131
+
132
+ class ExitHouseReward(RewardComponent):
133
+ """Reward for first time leaving the starting house - +20 points"""
134
+
135
+ def __init__(self):
136
+ self.house_exited = False
137
+
138
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
139
+ if self.house_exited:
140
+ return 0.0
141
+
142
+ prev_map = action.get("prev_map_id", -1)
143
+ current_map = state["map_id"]
144
+
145
+ # Exit from house to town
146
+ if prev_map in [1, 2] and current_map == 0: # House to Pallet Town
147
+ self.house_exited = True
148
+ return 20.0
149
+ return 0.0
150
+
151
+
152
+ class ExploreTownReward(RewardComponent):
153
+ """Reward for each new building/house entered - +5 points"""
154
+
155
+ def __init__(self):
156
+ self.buildings_entered: Set[int] = set()
157
+
158
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
159
+ prev_map = action.get("prev_map_id", -1)
160
+ current_map = state["map_id"]
161
+
162
+ # Entering a new building from town
163
+ if (
164
+ prev_map == 0 and current_map > 0 and current_map not in [1, 2]
165
+ ): # From town to new building
166
+ if current_map not in self.buildings_entered:
167
+ self.buildings_entered.add(current_map)
168
+ return 5.0
169
+ return 0.0
170
+
171
+
172
+ class TalkToNPCsReward(RewardComponent):
173
+ """Reward for each unique NPC conversation in Pallet Town - +8 points"""
174
+
175
+ def __init__(self):
176
+ self.npcs_talked_to: Set[tuple] = set()
177
+
178
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
179
+ # Detect NPC conversations in Pallet Town
180
+ if state["map_id"] == 0 and state["text_box_active"]: # Pallet Town
181
+ prev_text_active = action.get("prev_text_box_active", False)
182
+ if not prev_text_active and state["text_box_active"]:
183
+ # Use position as NPC identifier
184
+ npc_key = (state["player_x"], state["player_y"], state["map_id"])
185
+ if npc_key not in self.npcs_talked_to:
186
+ self.npcs_talked_to.add(npc_key)
187
+ return 8.0
188
+ return 0.0
189
+
190
+
191
+ class OakLabDiscoveryReward(RewardComponent):
192
+ """Reward for finding and entering Oak's lab - +25 points"""
193
+
194
+ def __init__(self):
195
+ self.lab_discovered = False
196
+
197
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
198
+ if self.lab_discovered:
199
+ return 0.0
200
+
201
+ prev_map = action.get("prev_map_id", -1)
202
+ current_map = state["map_id"]
203
+
204
+ # Entering Oak's lab (map 3)
205
+ if prev_map == 0 and current_map == 3:
206
+ self.lab_discovered = True
207
+ return 25.0
208
+ return 0.0
209
+
210
+
211
+ class AttemptRoute1Reward(RewardComponent):
212
+ """Reward for trying to leave town (triggers Oak encounter) - +30 points"""
213
+
214
+ def __init__(self):
215
+ self.route_attempted = False
216
+
217
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
218
+ if self.route_attempted:
219
+ return 0.0
220
+
221
+ # Detect reaching the edge of Pallet Town (attempting to go north)
222
+ if state["map_id"] == 0: # In Pallet Town
223
+ if state["player_y"] <= 1: # At northern edge
224
+ self.route_attempted = True
225
+ return 30.0
226
+ return 0.0
227
+
228
+
229
+ class OakEncounterReward(RewardComponent):
230
+ """Reward for triggering Professor Oak to stop you - +50 points"""
231
+
232
+ def __init__(self):
233
+ self.oak_encountered = False
234
+
235
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
236
+ if self.oak_encountered:
237
+ return 0.0
238
+
239
+ # Detect Oak stopping you (usually involves specific dialogue)
240
+ if state["text_box_active"] and not action.get("prev_text_box_active", False):
241
+ # Check if we're in a situation where Oak would appear
242
+ if state["map_id"] == 0 and state["player_y"] <= 2:
243
+ self.oak_encountered = True
244
+ return 50.0
245
+ return 0.0
246
+
247
+
248
+ class FollowOakToLabReward(RewardComponent):
249
+ """Reward for returning to lab with Oak - +40 points"""
250
+
251
+ def __init__(self):
252
+ self.followed_oak = False
253
+ self.oak_encounter_happened = False
254
+
255
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
256
+ if self.followed_oak:
257
+ return 0.0
258
+
259
+ # Track Oak encounter first
260
+ if not self.oak_encounter_happened and state["map_id"] == 0 and state["player_y"] <= 2:
261
+ if state["text_box_active"]:
262
+ self.oak_encounter_happened = True
263
+
264
+ # Then reward entering lab after encounter
265
+ if self.oak_encounter_happened:
266
+ prev_map = action.get("prev_map_id", -1)
267
+ current_map = state["map_id"]
268
+ if prev_map == 0 and current_map == 3: # Town to lab
269
+ self.followed_oak = True
270
+ return 40.0
271
+ return 0.0
272
+
273
+
274
+ class ChooseStarterPokemonReward(RewardComponent):
275
+ """Reward for selecting first Pokemon - +100 points"""
276
+
277
+ def __init__(self):
278
+ self.starter_chosen = False
279
+
280
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
281
+ if self.starter_chosen:
282
+ return 0.0
283
+
284
+ # Detect getting first Pokemon
285
+ prev_party_count = len(action.get("prev_party", []))
286
+ current_party_count = len(state.get("party", []))
287
+
288
+ if prev_party_count == 0 and current_party_count == 1:
289
+ if state["map_id"] == 3: # In Oak's lab
290
+ self.starter_chosen = True
291
+ return 100.0
292
+ return 0.0
293
+
294
+
295
+ class RivalEncounterReward(RewardComponent):
296
+ """Reward for meeting and naming rival - +30 points"""
297
+
298
+ def __init__(self):
299
+ self.rival_met = False
300
+
301
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
302
+ if self.rival_met:
303
+ return 0.0
304
+
305
+ # Detect rival encounter (specific dialogue patterns)
306
+ if state["map_id"] == 3 and state["text_box_active"]: # In Oak's lab
307
+ # This is a simplified check - in reality you'd analyze dialogue content
308
+ prev_text_active = action.get("prev_text_box_active", False)
309
+ if not prev_text_active and state["text_box_active"]:
310
+ # Check if we have at least one Pokemon (starter chosen)
311
+ if len(state.get("party", [])) >= 1:
312
+ self.rival_met = True
313
+ return 30.0
314
+ return 0.0
315
+
316
+
317
+ class FirstPokemonBattleReward(RewardComponent):
318
+ """Reward for the first battle with rival - +75 points"""
319
+
320
+ def __init__(self):
321
+ self.first_battle_done = False
322
+
323
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
324
+ if self.first_battle_done:
325
+ return 0.0
326
+
327
+ # Detect entering battle for first time
328
+ prev_in_battle = action.get("prev_in_battle", False)
329
+ current_in_battle = state["in_battle"]
330
+
331
+ if not prev_in_battle and current_in_battle:
332
+ if state["map_id"] == 3: # In Oak's lab
333
+ self.first_battle_done = True
334
+ return 75.0
335
+ return 0.0
336
+
337
+
338
+ class MenuDiscoveryReward(RewardComponent):
339
+ """Reward for opening START menu for first time - +10 points"""
340
+
341
+ def __init__(self):
342
+ self.menu_discovered = False
343
+
344
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
345
+ if self.menu_discovered:
346
+ return 0.0
347
+
348
+ # This would need menu state tracking - simplified implementation
349
+ # In real implementation, you'd track when START is pressed
350
+ buttons_pressed = action.get("buttons_pressed", [])
351
+ if "START" in buttons_pressed and not self.menu_discovered:
352
+ self.menu_discovered = True
353
+ return 10.0
354
+ return 0.0
355
+
356
+
357
+ class PokemonMenuReward(RewardComponent):
358
+ """Reward for checking Pokemon party status - +15 points"""
359
+
360
+ def __init__(self):
361
+ self.pokemon_menu_checked = False
362
+
363
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
364
+ if self.pokemon_menu_checked:
365
+ return 0.0
366
+
367
+ # Simplified - would need menu navigation tracking
368
+ # This is a placeholder for actual menu state detection
369
+ if len(state.get("party", [])) > 0: # Has Pokemon
370
+ # Assume menu was checked if we have Pokemon and certain conditions
371
+ self.pokemon_menu_checked = True
372
+ return 15.0
373
+ return 0.0
374
+
375
+
376
+ class BagDiscoveryReward(RewardComponent):
377
+ """Reward for opening bag/items menu - +10 points"""
378
+
379
+ def __init__(self):
380
+ self.bag_discovered = False
381
+
382
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
383
+ if self.bag_discovered:
384
+ return 0.0
385
+
386
+ # Simplified implementation
387
+ if len(state.get("inventory", [])) > 0:
388
+ self.bag_discovered = True
389
+ return 10.0
390
+ return 0.0
391
+
392
+
393
+ class SaveGameReward(RewardComponent):
394
+ """Reward for saving the game for first time - +20 points"""
395
+
396
+ def __init__(self):
397
+ self.game_saved = False
398
+
399
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
400
+ if self.game_saved:
401
+ return 0.0
402
+
403
+ # This would need save state detection
404
+ # Placeholder implementation
405
+ if state.get("game_saved", False):
406
+ self.game_saved = True
407
+ return 20.0
408
+ return 0.0
409
+
410
+
411
+ class TryAllDirectionsReward(RewardComponent):
412
+ """Reward for attempting movement in all 4 directions - +5 points"""
413
+
414
+ def __init__(self):
415
+ self.directions_tried: Set[str] = set()
416
+ self.reward_given = False
417
+
418
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
419
+ if self.reward_given:
420
+ return 0.0
421
+
422
+ # Track movement directions
423
+ buttons_pressed = action.get("buttons_pressed", [])
424
+ for button in buttons_pressed:
425
+ if button in ["UP", "DOWN", "LEFT", "RIGHT"]:
426
+ self.directions_tried.add(button)
427
+
428
+ if len(self.directions_tried) >= 4:
429
+ self.reward_given = True
430
+ return 5.0
431
+ return 0.0
432
+
433
+
434
+ class DoorInteractionReward(RewardComponent):
435
+ """Reward for trying to enter each door/building - +3 points per door"""
436
+
437
+ def __init__(self):
438
+ self.doors_tried: Set[tuple] = set()
439
+
440
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
441
+ # Detect door interaction attempts
442
+ prev_map = action.get("prev_map_id", -1)
443
+ current_map = state["map_id"]
444
+
445
+ if prev_map != current_map and prev_map == 0: # From town to building
446
+ door_key = (state["player_x"], state["player_y"], current_map)
447
+ if door_key not in self.doors_tried:
448
+ self.doors_tried.add(door_key)
449
+ return 3.0
450
+ return 0.0
451
+
452
+
453
+ class ObjectInteractionReward(RewardComponent):
454
+ """Reward for pressing A on various objects - +3 points per object"""
455
+
456
+ def __init__(self):
457
+ self.objects_interacted: Set[tuple] = set()
458
+
459
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
460
+ # Detect A button interactions
461
+ buttons_pressed = action.get("buttons_pressed", [])
462
+ if "A" in buttons_pressed and state["text_box_active"]:
463
+ prev_text_active = action.get("prev_text_box_active", False)
464
+ if not prev_text_active:
465
+ object_key = (state["player_x"], state["player_y"], state["map_id"])
466
+ if object_key not in self.objects_interacted:
467
+ self.objects_interacted.add(object_key)
468
+ return 3.0
469
+ return 0.0
470
+
471
+
472
+ class SignReadingReward(RewardComponent):
473
+ """Reward for reading town sign and other informational signs - +5 points"""
474
+
475
+ def __init__(self):
476
+ self.signs_read: Set[tuple] = set()
477
+
478
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
479
+ # Detect sign reading (specific positions in town)
480
+ if state["map_id"] == 0 and state["text_box_active"]: # Pallet Town
481
+ prev_text_active = action.get("prev_text_box_active", False)
482
+ if not prev_text_active:
483
+ # Town sign is usually at specific coordinates
484
+ sign_positions = [(5, 8), (6, 8), (7, 8)] # Common sign positions
485
+ player_pos = (state["player_x"], state["player_y"])
486
+ if player_pos in sign_positions:
487
+ sign_key = (state["player_x"], state["player_y"])
488
+ if sign_key not in self.signs_read:
489
+ self.signs_read.add(sign_key)
490
+ return 5.0
491
+ return 0.0
492
+
493
+
494
+ class CompleteTownExplorationReward(RewardComponent):
495
+ """Reward for visiting every accessible location - +50 points"""
496
+
497
+ def __init__(self):
498
+ self.locations_visited: Set[tuple] = set()
499
+ self.required_locations = 20 # Estimated accessible tiles in Pallet Town
500
+ self.reward_given = False
501
+
502
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
503
+ if self.reward_given:
504
+ return 0.0
505
+
506
+ if state["map_id"] == 0: # In Pallet Town
507
+ location_key = (state["player_x"], state["player_y"])
508
+ self.locations_visited.add(location_key)
509
+
510
+ if len(self.locations_visited) >= self.required_locations:
511
+ self.reward_given = True
512
+ return 50.0
513
+ return 0.0
514
+
515
+
516
+ class AllNPCsTalkedToReward(RewardComponent):
517
+ """Reward for speaking with every NPC in town - +30 points"""
518
+
519
+ def __init__(self):
520
+ self.npcs_talked_to: Set[tuple] = set()
521
+ self.required_npcs = 5 # Estimated NPCs in Pallet Town
522
+ self.reward_given = False
523
+
524
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
525
+ if self.reward_given:
526
+ return 0.0
527
+
528
+ # Track NPC conversations
529
+ if state["map_id"] == 0 and state["text_box_active"]:
530
+ prev_text_active = action.get("prev_text_box_active", False)
531
+ if not prev_text_active:
532
+ npc_key = (state["player_x"], state["player_y"])
533
+ self.npcs_talked_to.add(npc_key)
534
+
535
+ if len(self.npcs_talked_to) >= self.required_npcs:
536
+ self.reward_given = True
537
+ return 30.0
538
+ return 0.0
539
+
540
+
541
+ class ReadyForAdventureReward(RewardComponent):
542
+ """Reward for having starter Pokemon and being ready to leave town - +60 points"""
543
+
544
+ def __init__(self):
545
+ self.ready_reward_given = False
546
+
547
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
548
+ if self.ready_reward_given:
549
+ return 0.0
550
+
551
+ # Check if player has starter and is at town exit
552
+ has_pokemon = len(state.get("party", [])) > 0
553
+ at_town_exit = state["map_id"] == 0 and state["player_y"] <= 2
554
+
555
+ if has_pokemon and at_town_exit:
556
+ self.ready_reward_given = True
557
+ return 60.0
558
+ return 0.0