synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. synth_ai/environments/examples/__init__.py +1 -0
  2. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  3. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  4. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  5. synth_ai/environments/examples/crafter_classic/engine.py +579 -0
  6. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  7. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  8. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  9. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
  10. synth_ai/environments/examples/crafter_classic/environment.py +364 -0
  11. synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
  12. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
  13. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
  14. synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
  15. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
  16. synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
  17. synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
  18. synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
  19. synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
  20. synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
  21. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
  22. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
  23. synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
  24. synth_ai/environments/examples/crafter_custom/environment.py +312 -0
  25. synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
  26. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  27. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  28. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  29. synth_ai/environments/examples/enron/engine.py +291 -0
  30. synth_ai/environments/examples/enron/environment.py +165 -0
  31. synth_ai/environments/examples/enron/taskset.py +112 -0
  32. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  33. synth_ai/environments/examples/minigrid/engine.py +589 -0
  34. synth_ai/environments/examples/minigrid/environment.py +274 -0
  35. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  36. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  37. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  38. synth_ai/environments/examples/nethack/__init__.py +7 -0
  39. synth_ai/environments/examples/nethack/achievements.py +337 -0
  40. synth_ai/environments/examples/nethack/engine.py +738 -0
  41. synth_ai/environments/examples/nethack/environment.py +255 -0
  42. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  43. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  44. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  45. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  46. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  47. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  48. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  49. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  50. synth_ai/environments/examples/nethack/taskset.py +323 -0
  51. synth_ai/environments/examples/red/__init__.py +7 -0
  52. synth_ai/environments/examples/red/config_logging.py +110 -0
  53. synth_ai/environments/examples/red/engine.py +693 -0
  54. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  55. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  56. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  57. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  58. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  59. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  60. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  61. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  62. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  63. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  64. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  69. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  70. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  71. synth_ai/environments/examples/red/environment.py +235 -0
  72. synth_ai/environments/examples/red/taskset.py +77 -0
  73. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  74. synth_ai/environments/examples/sokoban/engine.py +675 -0
  75. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  76. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  77. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  78. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  79. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  80. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  81. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  82. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  87. synth_ai/environments/examples/sokoban/environment.py +228 -0
  88. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  89. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  90. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  91. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  92. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  93. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  94. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  95. synth_ai/environments/examples/verilog/__init__.py +10 -0
  96. synth_ai/environments/examples/verilog/engine.py +328 -0
  97. synth_ai/environments/examples/verilog/environment.py +349 -0
  98. synth_ai/environments/examples/verilog/taskset.py +418 -0
  99. synth_ai/environments/examples/wordle/__init__.py +29 -0
  100. synth_ai/environments/examples/wordle/engine.py +391 -0
  101. synth_ai/environments/examples/wordle/environment.py +154 -0
  102. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +75 -0
  103. synth_ai/environments/examples/wordle/taskset.py +222 -0
  104. synth_ai/environments/service/app.py +8 -0
  105. synth_ai/environments/service/core_routes.py +38 -0
  106. synth_ai/learning/prompts/banking77_injection_eval.py +163 -0
  107. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +201 -0
  108. synth_ai/learning/prompts/mipro.py +273 -1
  109. synth_ai/learning/prompts/random_search.py +247 -0
  110. synth_ai/learning/prompts/run_mipro_banking77.py +160 -0
  111. synth_ai/learning/prompts/run_random_search_banking77.py +305 -0
  112. synth_ai/lm/injection.py +81 -0
  113. synth_ai/lm/overrides.py +204 -0
  114. synth_ai/lm/provider_support/anthropic.py +39 -12
  115. synth_ai/lm/provider_support/openai.py +31 -4
  116. synth_ai/lm/vendors/core/anthropic_api.py +16 -0
  117. synth_ai/lm/vendors/openai_standard.py +35 -5
  118. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/METADATA +2 -1
  119. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/RECORD +123 -13
  120. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/WHEEL +0 -0
  121. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/entry_points.txt +0 -0
  122. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/licenses/LICENSE +0 -0
  123. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,28 @@
1
+ # Pokemon Red memory addresses for state extraction
2
+ BADGE_FLAGS = 0xD356 # bit-field for badges
3
+ MAP_ID = 0xD35E # current map ID
4
+ PLAYER_X = 0xD362 # player X coordinate
5
+ PLAYER_Y = 0xD361 # player Y coordinate
6
+ IN_BATTLE_FLAG = 0xD057 # battle state flag
7
+ BATTLE_OUTCOME = 0xD089 # 0=ongoing, 1=win, 2=lose
8
+
9
+ # Party Pokemon data (up to 6 Pokemon)
10
+ PARTY_COUNT = 0xD163 # number of Pokemon in party (0-6)
11
+ PARTY_SPECIES = 0xD164 # species of each Pokemon (6 bytes)
12
+ PARTY_HP_CURRENT = 0xD16C # current HP of each Pokemon (2 bytes each, 12 bytes total)
13
+ PARTY_HP_MAX = 0xD188 # max HP of each Pokemon (2 bytes each, 12 bytes total)
14
+ PARTY_LEVELS = 0xD18C # level of each Pokemon (6 bytes)
15
+ PARTY_XP = 0xD179 # XP of each Pokemon (3 bytes each, 18 bytes total)
16
+
17
+ # Player data
18
+ MONEY = 0xD347 # player money (3 bytes, BCD format)
19
+ PLAYER_NAME = 0xD158 # player name (up to 11 bytes)
20
+
21
+ # Inventory data
22
+ INVENTORY_COUNT = 0xD31D # number of items in bag
23
+ INVENTORY_START = 0xD31E # start of item/quantity pairs (up to 20 items, 2 bytes each)
24
+
25
+ # Game state flags
26
+ MENU_STATE = 0xCC26 # menu state flags
27
+ WARP_FLAG = 0xD36C # warp/transition flags
28
+ TEXT_BOX_ACTIVE = 0xCD3D # text box display flag
@@ -0,0 +1,275 @@
1
+ from synth_ai.environments.environment.rewards.core import RewardComponent
2
+ from typing import Dict, Any, Set
3
+
4
+
5
+ class BadgeRewardComponent(RewardComponent):
6
+ """Reward for earning gym badges"""
7
+
8
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
9
+ prev_badges = action.get("prev_badges", 0)
10
+ current_badges = state["badges"]
11
+ new_badges = current_badges & ~prev_badges
12
+ badge_count = bin(new_badges).count("1")
13
+ return badge_count * 1.0
14
+
15
+
16
+ class MapTransitionComponent(RewardComponent):
17
+ """Reward for exploring new areas"""
18
+
19
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
20
+ prev_map = action.get("prev_map_id", -1)
21
+ current_map = state["map_id"]
22
+ return 0.1 if current_map != prev_map else 0.0
23
+
24
+
25
+ class BattleVictoryComponent(RewardComponent):
26
+ """Reward for winning battles"""
27
+
28
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
29
+ prev_in_battle = action.get("prev_in_battle", False)
30
+ current_in_battle = state["in_battle"]
31
+ battle_outcome = state["battle_outcome"]
32
+
33
+ # Transitioning from battle to not in battle with victory
34
+ if prev_in_battle and not current_in_battle and battle_outcome == 1:
35
+ return 0.5
36
+ return 0.0
37
+
38
+
39
+ class LevelUpComponent(RewardComponent):
40
+ """Reward for Pokemon leveling up"""
41
+
42
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
43
+ prev_level = action.get("prev_party_level", 0)
44
+ current_level = state["party_level"]
45
+ level_gain = max(0, current_level - prev_level)
46
+ return level_gain * 0.3
47
+
48
+
49
+ class XPGainComponent(RewardComponent):
50
+ """Small reward for XP gains"""
51
+
52
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
53
+ prev_xp = action.get("prev_party_xp", 0)
54
+ current_xp = state["party_xp"]
55
+ xp_gain = max(0, current_xp - prev_xp)
56
+ return xp_gain * 0.001 # Very small multiplier
57
+
58
+
59
+ class StepPenaltyComponent(RewardComponent):
60
+ """Small penalty for each step to encourage efficiency"""
61
+
62
+ def __init__(self, penalty: float = -0.001):
63
+ self.penalty = penalty
64
+
65
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
66
+ return self.penalty
67
+
68
+
69
+ class MenuPenaltyComponent(RewardComponent):
70
+ """Penalty for excessive menu usage"""
71
+
72
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
73
+ # This would need more sophisticated menu tracking
74
+ return 0.0
75
+
76
+
77
+ # ===== NEW EARLY GAME PALLET TOWN REWARDS =====
78
+
79
+
80
+ class ExitHouseReward(RewardComponent):
81
+ """High reward for first time leaving the starting house - +2.0 points"""
82
+
83
+ def __init__(self):
84
+ self.house_exited = False
85
+
86
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
87
+ if self.house_exited:
88
+ return 0.0
89
+
90
+ prev_map = action.get("prev_map_id", -1)
91
+ current_map = state["map_id"]
92
+
93
+ # Exit from house to town (assuming house maps are 1,2 and town is 0)
94
+ if prev_map in [1, 2] and current_map == 0:
95
+ self.house_exited = True
96
+ return 2.0
97
+ return 0.0
98
+
99
+
100
+ class NPCInteractionReward(RewardComponent):
101
+ """Reward for talking to NPCs - +0.8 points per unique NPC"""
102
+
103
+ def __init__(self):
104
+ self.npcs_talked_to: Set[tuple] = set()
105
+
106
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
107
+ # Detect NPC conversations
108
+ if state["text_box_active"] and not action.get("prev_text_box_active", False):
109
+ # Use position as NPC identifier
110
+ npc_key = (state["player_x"], state["player_y"], state["map_id"])
111
+ if npc_key not in self.npcs_talked_to:
112
+ self.npcs_talked_to.add(npc_key)
113
+ return 0.8
114
+ return 0.0
115
+
116
+
117
+ class OakLabDiscoveryReward(RewardComponent):
118
+ """High reward for finding and entering Oak's lab - +2.5 points"""
119
+
120
+ def __init__(self):
121
+ self.lab_discovered = False
122
+
123
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
124
+ if self.lab_discovered:
125
+ return 0.0
126
+
127
+ prev_map = action.get("prev_map_id", -1)
128
+ current_map = state["map_id"]
129
+
130
+ # Entering Oak's lab (assuming map 3)
131
+ if prev_map == 0 and current_map == 3:
132
+ self.lab_discovered = True
133
+ return 2.5
134
+ return 0.0
135
+
136
+
137
+ class StarterPokemonReward(RewardComponent):
138
+ """Very high reward for getting first Pokemon - +10.0 points"""
139
+
140
+ def __init__(self):
141
+ self.starter_obtained = False
142
+
143
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
144
+ if self.starter_obtained:
145
+ return 0.0
146
+
147
+ # Detect getting first Pokemon
148
+ prev_party_count = len(action.get("prev_party", []))
149
+ current_party_count = len(state.get("party", []))
150
+
151
+ if prev_party_count == 0 and current_party_count == 1:
152
+ if state["map_id"] == 3: # In Oak's lab
153
+ self.starter_obtained = True
154
+ return 10.0
155
+ return 0.0
156
+
157
+
158
+ class FirstBattleReward(RewardComponent):
159
+ """High reward for engaging in first battle - +5.0 points"""
160
+
161
+ def __init__(self):
162
+ self.first_battle = False
163
+
164
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
165
+ if self.first_battle:
166
+ return 0.0
167
+
168
+ prev_in_battle = action.get("prev_in_battle", False)
169
+ current_in_battle = state["in_battle"]
170
+
171
+ if not prev_in_battle and current_in_battle:
172
+ self.first_battle = True
173
+ return 5.0
174
+ return 0.0
175
+
176
+
177
+ class DirectionExplorationReward(RewardComponent):
178
+ """Reward for trying all movement directions - +1.0 points when complete"""
179
+
180
+ def __init__(self):
181
+ self.directions_tried: Set[str] = set()
182
+ self.reward_given = False
183
+
184
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
185
+ if self.reward_given:
186
+ return 0.0
187
+
188
+ # Track movement directions based on position changes
189
+ prev_x = action.get("prev_player_x", state["player_x"])
190
+ prev_y = action.get("prev_player_y", state["player_y"])
191
+ current_x = state["player_x"]
192
+ current_y = state["player_y"]
193
+
194
+ if current_x > prev_x:
195
+ self.directions_tried.add("RIGHT")
196
+ elif current_x < prev_x:
197
+ self.directions_tried.add("LEFT")
198
+ elif current_y > prev_y:
199
+ self.directions_tried.add("DOWN")
200
+ elif current_y < prev_y:
201
+ self.directions_tried.add("UP")
202
+
203
+ if len(self.directions_tried) >= 4:
204
+ self.reward_given = True
205
+ return 1.0
206
+ return 0.0
207
+
208
+
209
+ class BuildingExplorationReward(RewardComponent):
210
+ """Reward for entering different buildings - +0.5 points per building"""
211
+
212
+ def __init__(self):
213
+ self.buildings_entered: Set[int] = set()
214
+
215
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
216
+ prev_map = action.get("prev_map_id", -1)
217
+ current_map = state["map_id"]
218
+
219
+ # Entering a new building from town
220
+ if (
221
+ prev_map == 0 and current_map > 0 and current_map not in [1, 2]
222
+ ): # From town to new building
223
+ if current_map not in self.buildings_entered:
224
+ self.buildings_entered.add(current_map)
225
+ return 0.5
226
+ return 0.0
227
+
228
+
229
+ class ObjectInteractionReward(RewardComponent):
230
+ """Reward for pressing A on various objects - +0.3 points per object"""
231
+
232
+ def __init__(self):
233
+ self.objects_interacted: Set[tuple] = set()
234
+
235
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
236
+ # Detect A button interactions that trigger text
237
+ if state["text_box_active"] and not action.get("prev_text_box_active", False):
238
+ object_key = (state["player_x"], state["player_y"], state["map_id"])
239
+ if object_key not in self.objects_interacted:
240
+ self.objects_interacted.add(object_key)
241
+ return 0.3
242
+ return 0.0
243
+
244
+
245
+ class TownExplorationReward(RewardComponent):
246
+ """Reward for thorough town exploration - +0.1 per new position"""
247
+
248
+ def __init__(self):
249
+ self.positions_visited: Set[tuple] = set()
250
+
251
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
252
+ if state["map_id"] == 0: # In Pallet Town
253
+ position_key = (state["player_x"], state["player_y"])
254
+ if position_key not in self.positions_visited:
255
+ self.positions_visited.add(position_key)
256
+ return 0.1
257
+ return 0.0
258
+
259
+
260
+ class RouteAttemptReward(RewardComponent):
261
+ """Reward for trying to leave town (triggers story) - +3.0 points"""
262
+
263
+ def __init__(self):
264
+ self.route_attempted = False
265
+
266
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
267
+ if self.route_attempted:
268
+ return 0.0
269
+
270
+ # Detect reaching the edge of Pallet Town (attempting to go north)
271
+ if state["map_id"] == 0: # In Pallet Town
272
+ if state["player_y"] <= 1: # At northern edge
273
+ self.route_attempted = True
274
+ return 3.0
275
+ return 0.0
@@ -0,0 +1,142 @@
1
+ """
2
+ Pokemon Red Reward Library
3
+
4
+ Comprehensive collection of reward components organized by category.
5
+ """
6
+
7
+ from .pallet_town_rewards import *
8
+ from .exploration_rewards import *
9
+ from .social_rewards import *
10
+ from .pokemon_rewards import *
11
+ from .battle_rewards import *
12
+ from .story_rewards import *
13
+ from .economy_rewards import *
14
+ from .efficiency_rewards import *
15
+ from .novelty_rewards import *
16
+ from .adaptive_rewards import *
17
+ from .composite_rewards import *
18
+
19
+ __all__ = [
20
+ # Pallet Town Early Game
21
+ "LeaveStartingRoomReward",
22
+ "TalkToMomReward",
23
+ "InteractWithTVReward",
24
+ "CheckComputerReward",
25
+ "HouseFullyExploredReward",
26
+ "ExitHouseReward",
27
+ "ExploreTownReward",
28
+ "TalkToNPCsReward",
29
+ "OakLabDiscoveryReward",
30
+ "AttemptRoute1Reward",
31
+ "OakEncounterReward",
32
+ "FollowOakToLabReward",
33
+ "ChooseStarterPokemonReward",
34
+ "RivalEncounterReward",
35
+ "FirstPokemonBattleReward",
36
+ "MenuDiscoveryReward",
37
+ "PokemonMenuReward",
38
+ "BagDiscoveryReward",
39
+ "SaveGameReward",
40
+ "TryAllDirectionsReward",
41
+ "DoorInteractionReward",
42
+ "ObjectInteractionReward",
43
+ "SignReadingReward",
44
+ "CompleteTownExplorationReward",
45
+ "AllNPCsTalkedToReward",
46
+ "ReadyForAdventureReward",
47
+ # Exploration
48
+ "NewAreaDiscoveryReward",
49
+ "AreaCompletionReward",
50
+ "RouteCompletionReward",
51
+ "BuildingEntryReward",
52
+ "HiddenAreaDiscoveryReward",
53
+ "HiddenItemFoundReward",
54
+ "FirstItemOfTypeReward",
55
+ "RareItemDiscoveryReward",
56
+ "KeyItemAcquisitionReward",
57
+ "FirstWarpUsageReward",
58
+ "PCUsageReward",
59
+ "VendingMachineReward",
60
+ # Social & NPC
61
+ "NewNPCConversationReward",
62
+ "HelpfulInformationReceivedReward",
63
+ "StoryDialogueProgressionReward",
64
+ "ProfessorOakInteractionsReward",
65
+ "NPCGiftReceivedReward",
66
+ "TradeCompletionReward",
67
+ "NameRaterUsageReward",
68
+ # Pokemon Collection
69
+ "FirstPokemonCaughtReward",
70
+ "NewSpeciesCaughtReward",
71
+ "RarePokemonCaughtReward",
72
+ "EvolutionStonePokemonReward",
73
+ "PokedexMilestonesReward",
74
+ "AreaPokedexCompletionReward",
75
+ "TypeCollectionReward",
76
+ "PokemonEvolutionReward",
77
+ "LevelMilestonesReward",
78
+ "MoveLearningReward",
79
+ "TMHMTeachingReward",
80
+ # Battle & Combat
81
+ "WildPokemonDefeatedReward",
82
+ "TrainerBattleVictoryReward",
83
+ "GymLeaderVictoryReward",
84
+ "EliteFourMemberVictoryReward",
85
+ "ChampionVictoryReward",
86
+ "TypeAdvantageUsageReward",
87
+ "CriticalHitReward",
88
+ "StatusEffectUsageReward",
89
+ "OHKOReward",
90
+ "FlawlessVictoryReward",
91
+ "UnderleveledVictoryReward",
92
+ "BattleStreakReward",
93
+ # Story & Achievement
94
+ "GymBadgeEarnedReward",
95
+ "HMAcquisitionReward",
96
+ "EliteFourAccessReward",
97
+ "HallOfFameEntryReward",
98
+ "RivalBattleCompletionReward",
99
+ "TeamRocketDefeatReward",
100
+ "LegendaryEncounterReward",
101
+ "SilphCoCompletionReward",
102
+ "SafariZoneSuccessReward",
103
+ "GameCornerPrizesReward",
104
+ "FossilRevivalReward",
105
+ # Economy & Resources
106
+ "FirstEarningsReward",
107
+ "WealthMilestonesReward",
108
+ "SmartPurchasesReward",
109
+ "RarePurchaseReward",
110
+ "InventoryOrganizationReward",
111
+ "HealingItemUsageReward",
112
+ "PokeballEfficiencyReward",
113
+ # Efficiency & Optimization
114
+ "FastTravelUsageReward",
115
+ "OptimalRoutingReward",
116
+ "PuzzleSolvingReward",
117
+ "MoveEffectivenessReward",
118
+ "EvolutionTimingReward",
119
+ "HMUsageReward",
120
+ # Novelty & Exploration
121
+ "FirstBattleReward",
122
+ "FirstPokemonCenterVisitReward",
123
+ "FirstPokemartPurchaseReward",
124
+ "FirstSaveReward",
125
+ "MenuExplorationReward",
126
+ "ButtonDiscoveryReward",
127
+ "FeatureDiscoveryReward",
128
+ # Adaptive & Learning
129
+ "MistakeRecoveryReward",
130
+ "StrategyAdaptationReward",
131
+ "ResourceConservationReward",
132
+ "PatternRecognitionReward",
133
+ "RouteOptimizationReward",
134
+ "BattlePreparationReward",
135
+ # Composite & Milestone
136
+ "PerfectGymRunReward",
137
+ "AreaMasteryReward",
138
+ "SpeedrunMilestonesReward",
139
+ "ExplorationStreakReward",
140
+ "BattleWinStreakReward",
141
+ "PerfectDayReward",
142
+ ]
@@ -0,0 +1,56 @@
1
+ """
2
+ Adaptive & Learning Reward Components
3
+
4
+ Rewards for improvement over time and meta-learning.
5
+ """
6
+
7
+ from synth_ai.environments.environment.rewards.core import RewardComponent
8
+ from typing import Dict, Any
9
+
10
+
11
+ class MistakeRecoveryReward(RewardComponent):
12
+ """Reward for correcting previous errors - +10 points"""
13
+
14
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
15
+ # Placeholder - would need mistake tracking and recovery detection
16
+ return 0.0
17
+
18
+
19
+ class StrategyAdaptationReward(RewardComponent):
20
+ """Reward for changing tactics based on type matchups - +15 points"""
21
+
22
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
23
+ # Placeholder - would need strategy analysis
24
+ return 0.0
25
+
26
+
27
+ class ResourceConservationReward(RewardComponent):
28
+ """Reward for efficient PP/item usage - +8 points"""
29
+
30
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
31
+ # Placeholder - would need resource usage tracking
32
+ return 0.0
33
+
34
+
35
+ class PatternRecognitionReward(RewardComponent):
36
+ """Reward for recognizing and adapting to trainer patterns - +12 points"""
37
+
38
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
39
+ # Placeholder - would need pattern analysis
40
+ return 0.0
41
+
42
+
43
+ class RouteOptimizationReward(RewardComponent):
44
+ """Reward for finding better paths on repeat visits - +20 points"""
45
+
46
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
47
+ # Placeholder - would need route comparison
48
+ return 0.0
49
+
50
+
51
+ class BattlePreparationReward(RewardComponent):
52
+ """Reward for healing/preparing before major battles - +15 points"""
53
+
54
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
55
+ # Placeholder - would need preparation detection
56
+ return 0.0