synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. synth_ai/environments/examples/__init__.py +1 -0
  2. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  3. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  4. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  5. synth_ai/environments/examples/crafter_classic/engine.py +575 -0
  6. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  7. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  8. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  9. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
  10. synth_ai/environments/examples/crafter_classic/environment.py +364 -0
  11. synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
  12. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
  13. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
  14. synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
  15. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
  16. synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
  17. synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
  18. synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
  19. synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
  20. synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
  21. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
  22. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
  23. synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
  24. synth_ai/environments/examples/crafter_custom/environment.py +312 -0
  25. synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
  26. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  27. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  28. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  29. synth_ai/environments/examples/enron/engine.py +291 -0
  30. synth_ai/environments/examples/enron/environment.py +165 -0
  31. synth_ai/environments/examples/enron/taskset.py +112 -0
  32. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  33. synth_ai/environments/examples/minigrid/engine.py +589 -0
  34. synth_ai/environments/examples/minigrid/environment.py +274 -0
  35. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  36. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  37. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  38. synth_ai/environments/examples/nethack/__init__.py +7 -0
  39. synth_ai/environments/examples/nethack/achievements.py +337 -0
  40. synth_ai/environments/examples/nethack/engine.py +738 -0
  41. synth_ai/environments/examples/nethack/environment.py +255 -0
  42. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  43. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  44. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  45. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  46. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  47. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  48. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  49. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  50. synth_ai/environments/examples/nethack/taskset.py +323 -0
  51. synth_ai/environments/examples/red/__init__.py +7 -0
  52. synth_ai/environments/examples/red/config_logging.py +110 -0
  53. synth_ai/environments/examples/red/engine.py +693 -0
  54. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  55. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  56. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  57. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  58. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  59. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  60. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  61. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  62. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  63. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  64. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  69. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  70. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  71. synth_ai/environments/examples/red/environment.py +235 -0
  72. synth_ai/environments/examples/red/taskset.py +77 -0
  73. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  74. synth_ai/environments/examples/sokoban/engine.py +675 -0
  75. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  76. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  77. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  78. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  79. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  80. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  81. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  82. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  87. synth_ai/environments/examples/sokoban/environment.py +228 -0
  88. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  89. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  90. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  91. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  92. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  93. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  94. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  95. synth_ai/environments/examples/verilog/__init__.py +10 -0
  96. synth_ai/environments/examples/verilog/engine.py +328 -0
  97. synth_ai/environments/examples/verilog/environment.py +349 -0
  98. synth_ai/environments/examples/verilog/taskset.py +418 -0
  99. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
  100. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +104 -6
  101. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
  102. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
  103. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
  104. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,330 @@
1
+ """
2
+ Exploration & Discovery Reward Components
3
+
4
+ Rewards for map exploration, item discovery, and world interaction.
5
+ """
6
+
7
+ from synth_ai.environments.environment.rewards.core import RewardComponent
8
+ from typing import Dict, Any, Set
9
+
10
+
11
+ class NewAreaDiscoveryReward(RewardComponent):
12
+ """Reward for entering a new map/area for the first time - +10 points"""
13
+
14
+ def __init__(self):
15
+ self.discovered_areas: Set[int] = set()
16
+
17
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
18
+ current_map = state["map_id"]
19
+ if current_map not in self.discovered_areas:
20
+ self.discovered_areas.add(current_map)
21
+ return 10.0
22
+ return 0.0
23
+
24
+
25
+ class AreaCompletionReward(RewardComponent):
26
+ """Reward for visiting all accessible tiles in an area - +5 points"""
27
+
28
+ def __init__(self):
29
+ self.area_tiles: Dict[int, Set[tuple]] = {}
30
+ self.completed_areas: Set[int] = set()
31
+ # These would be loaded from game data in a real implementation
32
+ self.area_tile_counts = {
33
+ 0: 25, # Pallet Town
34
+ 1: 15, # House interior
35
+ 3: 20, # Oak's Lab
36
+ # Add more as needed
37
+ }
38
+
39
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
40
+ current_map = state["map_id"]
41
+ if current_map in self.completed_areas:
42
+ return 0.0
43
+
44
+ # Track tiles visited in this area
45
+ if current_map not in self.area_tiles:
46
+ self.area_tiles[current_map] = set()
47
+
48
+ tile = (state["player_x"], state["player_y"])
49
+ self.area_tiles[current_map].add(tile)
50
+
51
+ # Check if area is complete
52
+ required_tiles = self.area_tile_counts.get(current_map, 999)
53
+ if len(self.area_tiles[current_map]) >= required_tiles:
54
+ self.completed_areas.add(current_map)
55
+ return 5.0
56
+ return 0.0
57
+
58
+
59
+ class RouteCompletionReward(RewardComponent):
60
+ """Reward for fully exploring a route - +15 points"""
61
+
62
+ def __init__(self):
63
+ self.completed_routes: Set[int] = set()
64
+ self.route_progress: Dict[int, Dict[str, bool]] = {}
65
+
66
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
67
+ current_map = state["map_id"]
68
+
69
+ # Check if this is a route (routes typically have IDs in a certain range)
70
+ if current_map < 10 or current_map in self.completed_routes:
71
+ return 0.0
72
+
73
+ # Track route exploration elements
74
+ if current_map not in self.route_progress:
75
+ self.route_progress[current_map] = {
76
+ "all_grass_visited": False,
77
+ "all_trainers_fought": False,
78
+ "all_items_found": False,
79
+ }
80
+
81
+ # This is simplified - would need actual game data
82
+ # For now, just reward first full exploration
83
+ if current_map not in self.completed_routes:
84
+ self.completed_routes.add(current_map)
85
+ return 15.0
86
+ return 0.0
87
+
88
+
89
+ class BuildingEntryReward(RewardComponent):
90
+ """Reward for entering buildings - +3 points"""
91
+
92
+ def __init__(self):
93
+ self.buildings_entered: Set[int] = set()
94
+ # Building map IDs (would be loaded from game data)
95
+ self.building_maps = {3, 4, 5, 6, 7, 8, 9, 10} # Example building IDs
96
+
97
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
98
+ current_map = state["map_id"]
99
+ prev_map = action.get("prev_map_id", -1)
100
+
101
+ # Entering a building for the first time
102
+ if (
103
+ current_map in self.building_maps
104
+ and current_map not in self.buildings_entered
105
+ and prev_map != current_map
106
+ ):
107
+ self.buildings_entered.add(current_map)
108
+ return 3.0
109
+ return 0.0
110
+
111
+
112
+ class HiddenAreaDiscoveryReward(RewardComponent):
113
+ """Reward for finding secret areas - +20 points"""
114
+
115
+ def __init__(self):
116
+ self.hidden_areas_found: Set[int] = set()
117
+ # Hidden area map IDs (would be loaded from game data)
118
+ self.hidden_areas = {50, 51, 52} # Example hidden area IDs
119
+
120
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
121
+ current_map = state["map_id"]
122
+
123
+ if current_map in self.hidden_areas and current_map not in self.hidden_areas_found:
124
+ self.hidden_areas_found.add(current_map)
125
+ return 20.0
126
+ return 0.0
127
+
128
+
129
+ class HiddenItemFoundReward(RewardComponent):
130
+ """Reward for finding hidden items - +5 points"""
131
+
132
+ def __init__(self):
133
+ self.hidden_items_found: Set[tuple] = set()
134
+
135
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
136
+ # Detect item acquisition at specific locations
137
+ prev_inventory_count = len(action.get("prev_inventory", []))
138
+ current_inventory_count = len(state.get("inventory", []))
139
+
140
+ if current_inventory_count > prev_inventory_count:
141
+ # Item was acquired
142
+ item_location = (state["player_x"], state["player_y"], state["map_id"])
143
+ if item_location not in self.hidden_items_found:
144
+ # Check if this is a hidden item location (would use game data)
145
+ # For now, use heuristic based on position
146
+ if self._is_hidden_item_location(item_location):
147
+ self.hidden_items_found.add(item_location)
148
+ return 5.0
149
+ return 0.0
150
+
151
+ def _is_hidden_item_location(self, location: tuple) -> bool:
152
+ """Check if location is known to have hidden items"""
153
+ # This would be loaded from game data
154
+ hidden_locations = {
155
+ (3, 5, 0), # Example hidden item in Pallet Town
156
+ (7, 2, 1), # Example hidden item in route
157
+ }
158
+ return location in hidden_locations
159
+
160
+
161
+ class FirstItemOfTypeReward(RewardComponent):
162
+ """Reward for finding first item of each type - +10 points"""
163
+
164
+ def __init__(self):
165
+ self.item_types_found: Set[str] = set()
166
+ # Item type mappings (would be loaded from game data)
167
+ self.item_types = {
168
+ 1: "pokeball",
169
+ 2: "pokeball",
170
+ 3: "pokeball",
171
+ 10: "potion",
172
+ 11: "potion",
173
+ 12: "potion",
174
+ 20: "tm",
175
+ 21: "tm",
176
+ 22: "tm",
177
+ # Add more mappings
178
+ }
179
+
180
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
181
+ prev_inventory = action.get("prev_inventory", [])
182
+ current_inventory = state.get("inventory", [])
183
+
184
+ # Check for new items
185
+ prev_item_ids = {item.get("item_id") for item in prev_inventory}
186
+ current_item_ids = {item.get("item_id") for item in current_inventory}
187
+ new_items = current_item_ids - prev_item_ids
188
+
189
+ total_reward = 0.0
190
+ for item_id in new_items:
191
+ item_type = self.item_types.get(item_id, "unknown")
192
+ if item_type not in self.item_types_found and item_type != "unknown":
193
+ self.item_types_found.add(item_type)
194
+ total_reward += 10.0
195
+
196
+ return total_reward
197
+
198
+
199
+ class RareItemDiscoveryReward(RewardComponent):
200
+ """Reward for finding rare items - +25 points"""
201
+
202
+ def __init__(self):
203
+ self.rare_items_found: Set[int] = set()
204
+ # Rare item IDs (would be loaded from game data)
205
+ self.rare_items = {1, 50, 100} # Master Ball, rare TMs, etc.
206
+
207
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
208
+ prev_inventory = action.get("prev_inventory", [])
209
+ current_inventory = state.get("inventory", [])
210
+
211
+ # Check for new rare items
212
+ prev_item_ids = {item.get("item_id") for item in prev_inventory}
213
+ current_item_ids = {item.get("item_id") for item in current_inventory}
214
+ new_items = current_item_ids - prev_item_ids
215
+
216
+ total_reward = 0.0
217
+ for item_id in new_items:
218
+ if item_id in self.rare_items and item_id not in self.rare_items_found:
219
+ self.rare_items_found.add(item_id)
220
+ total_reward += 25.0
221
+
222
+ return total_reward
223
+
224
+
225
+ class KeyItemAcquisitionReward(RewardComponent):
226
+ """Reward for obtaining story-critical items - +30 points"""
227
+
228
+ def __init__(self):
229
+ self.key_items_obtained: Set[int] = set()
230
+ # Key item IDs (would be loaded from game data)
231
+ self.key_items = {200, 201, 202} # Pokedex, Town Map, etc.
232
+
233
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
234
+ prev_inventory = action.get("prev_inventory", [])
235
+ current_inventory = state.get("inventory", [])
236
+
237
+ # Check for new key items
238
+ prev_item_ids = {item.get("item_id") for item in prev_inventory}
239
+ current_item_ids = {item.get("item_id") for item in current_inventory}
240
+ new_items = current_item_ids - prev_item_ids
241
+
242
+ total_reward = 0.0
243
+ for item_id in new_items:
244
+ if item_id in self.key_items and item_id not in self.key_items_obtained:
245
+ self.key_items_obtained.add(item_id)
246
+ total_reward += 30.0
247
+
248
+ return total_reward
249
+
250
+
251
+ class FirstWarpUsageReward(RewardComponent):
252
+ """Reward for using doors, cave entrances, etc. for first time - +5 points"""
253
+
254
+ def __init__(self):
255
+ self.warp_types_used: Set[str] = set()
256
+
257
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
258
+ prev_map = action.get("prev_map_id", -1)
259
+ current_map = state["map_id"]
260
+
261
+ if prev_map != current_map:
262
+ # Determine warp type based on map transition
263
+ warp_type = self._get_warp_type(prev_map, current_map)
264
+ if warp_type and warp_type not in self.warp_types_used:
265
+ self.warp_types_used.add(warp_type)
266
+ return 5.0
267
+ return 0.0
268
+
269
+ def _get_warp_type(self, prev_map: int, current_map: int) -> str:
270
+ """Determine type of warp based on map transition"""
271
+ # This would use game data to classify warps
272
+ if prev_map == 0 and current_map > 0:
273
+ return "door"
274
+ elif prev_map > 10 and current_map > 10:
275
+ return "cave"
276
+ elif abs(prev_map - current_map) == 1:
277
+ return "route_transition"
278
+ return ""
279
+
280
+
281
+ class PCUsageReward(RewardComponent):
282
+ """Reward for first time using Pokemon PC storage - +10 points"""
283
+
284
+ def __init__(self):
285
+ self.pc_used = False
286
+
287
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
288
+ if self.pc_used:
289
+ return 0.0
290
+
291
+ # Detect PC usage (would need menu state tracking)
292
+ # Simplified: assume PC is used if in Pokemon Center and menu state changes
293
+ if state["map_id"] in [4, 8, 12] and state.get("menu_state", 0) > 0: # Pokemon Centers
294
+ # This is a placeholder - would need actual PC detection
295
+ if not action.get("prev_menu_state", 0):
296
+ self.pc_used = True
297
+ return 10.0
298
+ return 0.0
299
+
300
+
301
+ class VendingMachineReward(RewardComponent):
302
+ """Reward for discovering and using vending machines - +5 points"""
303
+
304
+ def __init__(self):
305
+ self.vending_machines_used: Set[tuple] = set()
306
+
307
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
308
+ # Detect vending machine usage
309
+ prev_money = action.get("prev_money", 0)
310
+ current_money = state.get("money", 0)
311
+ prev_inventory_count = len(action.get("prev_inventory", []))
312
+ current_inventory_count = len(state.get("inventory", []))
313
+
314
+ # Money decreased and items increased = purchase
315
+ if current_money < prev_money and current_inventory_count > prev_inventory_count:
316
+ # Check if at vending machine location
317
+ vending_location = (state["player_x"], state["player_y"], state["map_id"])
318
+ if self._is_vending_machine_location(vending_location):
319
+ if vending_location not in self.vending_machines_used:
320
+ self.vending_machines_used.add(vending_location)
321
+ return 5.0
322
+ return 0.0
323
+
324
+ def _is_vending_machine_location(self, location: tuple) -> bool:
325
+ """Check if location has a vending machine"""
326
+ # This would be loaded from game data
327
+ vending_locations = {
328
+ (5, 3, 15), # Example vending machine location
329
+ }
330
+ return location in vending_locations
@@ -0,0 +1,120 @@
1
+ """
2
+ Novelty & Exploration Bonus Reward Components
3
+
4
+ Rewards for first-time experiences and curiosity.
5
+ """
6
+
7
+ from synth_ai.environments.environment.rewards.core import RewardComponent
8
+ from typing import Dict, Any, Set
9
+
10
+
11
+ class FirstBattleReward(RewardComponent):
12
+ """Reward for engaging in first battle - +20 points"""
13
+
14
+ def __init__(self):
15
+ self.first_battle = False
16
+
17
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
18
+ if self.first_battle:
19
+ return 0.0
20
+
21
+ prev_in_battle = action.get("prev_in_battle", False)
22
+ current_in_battle = state["in_battle"]
23
+
24
+ if not prev_in_battle and current_in_battle:
25
+ self.first_battle = True
26
+ return 20.0
27
+ return 0.0
28
+
29
+
30
+ class FirstPokemonCenterVisitReward(RewardComponent):
31
+ """Reward for first healing - +15 points"""
32
+
33
+ def __init__(self):
34
+ self.first_heal = False
35
+ self.pokemon_center_maps = {4, 8, 12, 16} # Pokemon Center map IDs
36
+
37
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
38
+ if self.first_heal:
39
+ return 0.0
40
+
41
+ if state["map_id"] in self.pokemon_center_maps:
42
+ # Check for HP restoration (simplified)
43
+ party = state.get("party", [])
44
+ for pokemon in party:
45
+ if pokemon.get("hp_current", 0) == pokemon.get("hp_max", 0):
46
+ self.first_heal = True
47
+ return 15.0
48
+ return 0.0
49
+
50
+
51
+ class FirstPokemartPurchaseReward(RewardComponent):
52
+ """Reward for first item purchase - +10 points"""
53
+
54
+ def __init__(self):
55
+ self.first_purchase = False
56
+
57
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
58
+ if self.first_purchase:
59
+ return 0.0
60
+
61
+ prev_money = action.get("prev_money", 0)
62
+ current_money = state.get("money", 0)
63
+
64
+ if current_money < prev_money and prev_money > 0:
65
+ self.first_purchase = True
66
+ return 10.0
67
+ return 0.0
68
+
69
+
70
+ class FirstSaveReward(RewardComponent):
71
+ """Reward for saving the game - +5 points"""
72
+
73
+ def __init__(self):
74
+ self.first_save = False
75
+
76
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
77
+ if self.first_save:
78
+ return 0.0
79
+
80
+ # This would need save detection
81
+ if state.get("game_saved", False):
82
+ self.first_save = True
83
+ return 5.0
84
+ return 0.0
85
+
86
+
87
+ class MenuExplorationReward(RewardComponent):
88
+ """Reward for opening and exploring different menus - +3 points"""
89
+
90
+ def __init__(self):
91
+ self.menus_explored: Set[str] = set()
92
+
93
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
94
+ # This would need menu state tracking
95
+ # Placeholder implementation
96
+ return 0.0
97
+
98
+
99
+ class ButtonDiscoveryReward(RewardComponent):
100
+ """Reward for discovering START menu, SELECT uses - +5 points"""
101
+
102
+ def __init__(self):
103
+ self.buttons_discovered: Set[str] = set()
104
+
105
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
106
+ # This would need button usage tracking
107
+ # Placeholder implementation
108
+ return 0.0
109
+
110
+
111
+ class FeatureDiscoveryReward(RewardComponent):
112
+ """Reward for discovering PC, daycare, etc. - +10 points"""
113
+
114
+ def __init__(self):
115
+ self.features_discovered: Set[str] = set()
116
+
117
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
118
+ # This would need feature usage detection
119
+ # Placeholder implementation
120
+ return 0.0