synth-ai 0.2.4.dev3__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +575 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
- synth_ai/environments/examples/crafter_classic/environment.py +364 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
- synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
- synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
- synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
- synth_ai/environments/examples/crafter_custom/environment.py +312 -0
- synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/tracing_v3/examples/basic_usage.py +188 -0
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +105 -6
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,330 @@
|
|
1
|
+
"""
|
2
|
+
Exploration & Discovery Reward Components
|
3
|
+
|
4
|
+
Rewards for map exploration, item discovery, and world interaction.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
8
|
+
from typing import Dict, Any, Set
|
9
|
+
|
10
|
+
|
11
|
+
class NewAreaDiscoveryReward(RewardComponent):
|
12
|
+
"""Reward for entering a new map/area for the first time - +10 points"""
|
13
|
+
|
14
|
+
def __init__(self):
|
15
|
+
self.discovered_areas: Set[int] = set()
|
16
|
+
|
17
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
18
|
+
current_map = state["map_id"]
|
19
|
+
if current_map not in self.discovered_areas:
|
20
|
+
self.discovered_areas.add(current_map)
|
21
|
+
return 10.0
|
22
|
+
return 0.0
|
23
|
+
|
24
|
+
|
25
|
+
class AreaCompletionReward(RewardComponent):
|
26
|
+
"""Reward for visiting all accessible tiles in an area - +5 points"""
|
27
|
+
|
28
|
+
def __init__(self):
|
29
|
+
self.area_tiles: Dict[int, Set[tuple]] = {}
|
30
|
+
self.completed_areas: Set[int] = set()
|
31
|
+
# These would be loaded from game data in a real implementation
|
32
|
+
self.area_tile_counts = {
|
33
|
+
0: 25, # Pallet Town
|
34
|
+
1: 15, # House interior
|
35
|
+
3: 20, # Oak's Lab
|
36
|
+
# Add more as needed
|
37
|
+
}
|
38
|
+
|
39
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
40
|
+
current_map = state["map_id"]
|
41
|
+
if current_map in self.completed_areas:
|
42
|
+
return 0.0
|
43
|
+
|
44
|
+
# Track tiles visited in this area
|
45
|
+
if current_map not in self.area_tiles:
|
46
|
+
self.area_tiles[current_map] = set()
|
47
|
+
|
48
|
+
tile = (state["player_x"], state["player_y"])
|
49
|
+
self.area_tiles[current_map].add(tile)
|
50
|
+
|
51
|
+
# Check if area is complete
|
52
|
+
required_tiles = self.area_tile_counts.get(current_map, 999)
|
53
|
+
if len(self.area_tiles[current_map]) >= required_tiles:
|
54
|
+
self.completed_areas.add(current_map)
|
55
|
+
return 5.0
|
56
|
+
return 0.0
|
57
|
+
|
58
|
+
|
59
|
+
class RouteCompletionReward(RewardComponent):
|
60
|
+
"""Reward for fully exploring a route - +15 points"""
|
61
|
+
|
62
|
+
def __init__(self):
|
63
|
+
self.completed_routes: Set[int] = set()
|
64
|
+
self.route_progress: Dict[int, Dict[str, bool]] = {}
|
65
|
+
|
66
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
67
|
+
current_map = state["map_id"]
|
68
|
+
|
69
|
+
# Check if this is a route (routes typically have IDs in a certain range)
|
70
|
+
if current_map < 10 or current_map in self.completed_routes:
|
71
|
+
return 0.0
|
72
|
+
|
73
|
+
# Track route exploration elements
|
74
|
+
if current_map not in self.route_progress:
|
75
|
+
self.route_progress[current_map] = {
|
76
|
+
"all_grass_visited": False,
|
77
|
+
"all_trainers_fought": False,
|
78
|
+
"all_items_found": False,
|
79
|
+
}
|
80
|
+
|
81
|
+
# This is simplified - would need actual game data
|
82
|
+
# For now, just reward first full exploration
|
83
|
+
if current_map not in self.completed_routes:
|
84
|
+
self.completed_routes.add(current_map)
|
85
|
+
return 15.0
|
86
|
+
return 0.0
|
87
|
+
|
88
|
+
|
89
|
+
class BuildingEntryReward(RewardComponent):
|
90
|
+
"""Reward for entering buildings - +3 points"""
|
91
|
+
|
92
|
+
def __init__(self):
|
93
|
+
self.buildings_entered: Set[int] = set()
|
94
|
+
# Building map IDs (would be loaded from game data)
|
95
|
+
self.building_maps = {3, 4, 5, 6, 7, 8, 9, 10} # Example building IDs
|
96
|
+
|
97
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
98
|
+
current_map = state["map_id"]
|
99
|
+
prev_map = action.get("prev_map_id", -1)
|
100
|
+
|
101
|
+
# Entering a building for the first time
|
102
|
+
if (
|
103
|
+
current_map in self.building_maps
|
104
|
+
and current_map not in self.buildings_entered
|
105
|
+
and prev_map != current_map
|
106
|
+
):
|
107
|
+
self.buildings_entered.add(current_map)
|
108
|
+
return 3.0
|
109
|
+
return 0.0
|
110
|
+
|
111
|
+
|
112
|
+
class HiddenAreaDiscoveryReward(RewardComponent):
|
113
|
+
"""Reward for finding secret areas - +20 points"""
|
114
|
+
|
115
|
+
def __init__(self):
|
116
|
+
self.hidden_areas_found: Set[int] = set()
|
117
|
+
# Hidden area map IDs (would be loaded from game data)
|
118
|
+
self.hidden_areas = {50, 51, 52} # Example hidden area IDs
|
119
|
+
|
120
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
121
|
+
current_map = state["map_id"]
|
122
|
+
|
123
|
+
if current_map in self.hidden_areas and current_map not in self.hidden_areas_found:
|
124
|
+
self.hidden_areas_found.add(current_map)
|
125
|
+
return 20.0
|
126
|
+
return 0.0
|
127
|
+
|
128
|
+
|
129
|
+
class HiddenItemFoundReward(RewardComponent):
|
130
|
+
"""Reward for finding hidden items - +5 points"""
|
131
|
+
|
132
|
+
def __init__(self):
|
133
|
+
self.hidden_items_found: Set[tuple] = set()
|
134
|
+
|
135
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
136
|
+
# Detect item acquisition at specific locations
|
137
|
+
prev_inventory_count = len(action.get("prev_inventory", []))
|
138
|
+
current_inventory_count = len(state.get("inventory", []))
|
139
|
+
|
140
|
+
if current_inventory_count > prev_inventory_count:
|
141
|
+
# Item was acquired
|
142
|
+
item_location = (state["player_x"], state["player_y"], state["map_id"])
|
143
|
+
if item_location not in self.hidden_items_found:
|
144
|
+
# Check if this is a hidden item location (would use game data)
|
145
|
+
# For now, use heuristic based on position
|
146
|
+
if self._is_hidden_item_location(item_location):
|
147
|
+
self.hidden_items_found.add(item_location)
|
148
|
+
return 5.0
|
149
|
+
return 0.0
|
150
|
+
|
151
|
+
def _is_hidden_item_location(self, location: tuple) -> bool:
|
152
|
+
"""Check if location is known to have hidden items"""
|
153
|
+
# This would be loaded from game data
|
154
|
+
hidden_locations = {
|
155
|
+
(3, 5, 0), # Example hidden item in Pallet Town
|
156
|
+
(7, 2, 1), # Example hidden item in route
|
157
|
+
}
|
158
|
+
return location in hidden_locations
|
159
|
+
|
160
|
+
|
161
|
+
class FirstItemOfTypeReward(RewardComponent):
|
162
|
+
"""Reward for finding first item of each type - +10 points"""
|
163
|
+
|
164
|
+
def __init__(self):
|
165
|
+
self.item_types_found: Set[str] = set()
|
166
|
+
# Item type mappings (would be loaded from game data)
|
167
|
+
self.item_types = {
|
168
|
+
1: "pokeball",
|
169
|
+
2: "pokeball",
|
170
|
+
3: "pokeball",
|
171
|
+
10: "potion",
|
172
|
+
11: "potion",
|
173
|
+
12: "potion",
|
174
|
+
20: "tm",
|
175
|
+
21: "tm",
|
176
|
+
22: "tm",
|
177
|
+
# Add more mappings
|
178
|
+
}
|
179
|
+
|
180
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
181
|
+
prev_inventory = action.get("prev_inventory", [])
|
182
|
+
current_inventory = state.get("inventory", [])
|
183
|
+
|
184
|
+
# Check for new items
|
185
|
+
prev_item_ids = {item.get("item_id") for item in prev_inventory}
|
186
|
+
current_item_ids = {item.get("item_id") for item in current_inventory}
|
187
|
+
new_items = current_item_ids - prev_item_ids
|
188
|
+
|
189
|
+
total_reward = 0.0
|
190
|
+
for item_id in new_items:
|
191
|
+
item_type = self.item_types.get(item_id, "unknown")
|
192
|
+
if item_type not in self.item_types_found and item_type != "unknown":
|
193
|
+
self.item_types_found.add(item_type)
|
194
|
+
total_reward += 10.0
|
195
|
+
|
196
|
+
return total_reward
|
197
|
+
|
198
|
+
|
199
|
+
class RareItemDiscoveryReward(RewardComponent):
|
200
|
+
"""Reward for finding rare items - +25 points"""
|
201
|
+
|
202
|
+
def __init__(self):
|
203
|
+
self.rare_items_found: Set[int] = set()
|
204
|
+
# Rare item IDs (would be loaded from game data)
|
205
|
+
self.rare_items = {1, 50, 100} # Master Ball, rare TMs, etc.
|
206
|
+
|
207
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
208
|
+
prev_inventory = action.get("prev_inventory", [])
|
209
|
+
current_inventory = state.get("inventory", [])
|
210
|
+
|
211
|
+
# Check for new rare items
|
212
|
+
prev_item_ids = {item.get("item_id") for item in prev_inventory}
|
213
|
+
current_item_ids = {item.get("item_id") for item in current_inventory}
|
214
|
+
new_items = current_item_ids - prev_item_ids
|
215
|
+
|
216
|
+
total_reward = 0.0
|
217
|
+
for item_id in new_items:
|
218
|
+
if item_id in self.rare_items and item_id not in self.rare_items_found:
|
219
|
+
self.rare_items_found.add(item_id)
|
220
|
+
total_reward += 25.0
|
221
|
+
|
222
|
+
return total_reward
|
223
|
+
|
224
|
+
|
225
|
+
class KeyItemAcquisitionReward(RewardComponent):
|
226
|
+
"""Reward for obtaining story-critical items - +30 points"""
|
227
|
+
|
228
|
+
def __init__(self):
|
229
|
+
self.key_items_obtained: Set[int] = set()
|
230
|
+
# Key item IDs (would be loaded from game data)
|
231
|
+
self.key_items = {200, 201, 202} # Pokedex, Town Map, etc.
|
232
|
+
|
233
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
234
|
+
prev_inventory = action.get("prev_inventory", [])
|
235
|
+
current_inventory = state.get("inventory", [])
|
236
|
+
|
237
|
+
# Check for new key items
|
238
|
+
prev_item_ids = {item.get("item_id") for item in prev_inventory}
|
239
|
+
current_item_ids = {item.get("item_id") for item in current_inventory}
|
240
|
+
new_items = current_item_ids - prev_item_ids
|
241
|
+
|
242
|
+
total_reward = 0.0
|
243
|
+
for item_id in new_items:
|
244
|
+
if item_id in self.key_items and item_id not in self.key_items_obtained:
|
245
|
+
self.key_items_obtained.add(item_id)
|
246
|
+
total_reward += 30.0
|
247
|
+
|
248
|
+
return total_reward
|
249
|
+
|
250
|
+
|
251
|
+
class FirstWarpUsageReward(RewardComponent):
|
252
|
+
"""Reward for using doors, cave entrances, etc. for first time - +5 points"""
|
253
|
+
|
254
|
+
def __init__(self):
|
255
|
+
self.warp_types_used: Set[str] = set()
|
256
|
+
|
257
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
258
|
+
prev_map = action.get("prev_map_id", -1)
|
259
|
+
current_map = state["map_id"]
|
260
|
+
|
261
|
+
if prev_map != current_map:
|
262
|
+
# Determine warp type based on map transition
|
263
|
+
warp_type = self._get_warp_type(prev_map, current_map)
|
264
|
+
if warp_type and warp_type not in self.warp_types_used:
|
265
|
+
self.warp_types_used.add(warp_type)
|
266
|
+
return 5.0
|
267
|
+
return 0.0
|
268
|
+
|
269
|
+
def _get_warp_type(self, prev_map: int, current_map: int) -> str:
|
270
|
+
"""Determine type of warp based on map transition"""
|
271
|
+
# This would use game data to classify warps
|
272
|
+
if prev_map == 0 and current_map > 0:
|
273
|
+
return "door"
|
274
|
+
elif prev_map > 10 and current_map > 10:
|
275
|
+
return "cave"
|
276
|
+
elif abs(prev_map - current_map) == 1:
|
277
|
+
return "route_transition"
|
278
|
+
return ""
|
279
|
+
|
280
|
+
|
281
|
+
class PCUsageReward(RewardComponent):
|
282
|
+
"""Reward for first time using Pokemon PC storage - +10 points"""
|
283
|
+
|
284
|
+
def __init__(self):
|
285
|
+
self.pc_used = False
|
286
|
+
|
287
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
288
|
+
if self.pc_used:
|
289
|
+
return 0.0
|
290
|
+
|
291
|
+
# Detect PC usage (would need menu state tracking)
|
292
|
+
# Simplified: assume PC is used if in Pokemon Center and menu state changes
|
293
|
+
if state["map_id"] in [4, 8, 12] and state.get("menu_state", 0) > 0: # Pokemon Centers
|
294
|
+
# This is a placeholder - would need actual PC detection
|
295
|
+
if not action.get("prev_menu_state", 0):
|
296
|
+
self.pc_used = True
|
297
|
+
return 10.0
|
298
|
+
return 0.0
|
299
|
+
|
300
|
+
|
301
|
+
class VendingMachineReward(RewardComponent):
|
302
|
+
"""Reward for discovering and using vending machines - +5 points"""
|
303
|
+
|
304
|
+
def __init__(self):
|
305
|
+
self.vending_machines_used: Set[tuple] = set()
|
306
|
+
|
307
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
308
|
+
# Detect vending machine usage
|
309
|
+
prev_money = action.get("prev_money", 0)
|
310
|
+
current_money = state.get("money", 0)
|
311
|
+
prev_inventory_count = len(action.get("prev_inventory", []))
|
312
|
+
current_inventory_count = len(state.get("inventory", []))
|
313
|
+
|
314
|
+
# Money decreased and items increased = purchase
|
315
|
+
if current_money < prev_money and current_inventory_count > prev_inventory_count:
|
316
|
+
# Check if at vending machine location
|
317
|
+
vending_location = (state["player_x"], state["player_y"], state["map_id"])
|
318
|
+
if self._is_vending_machine_location(vending_location):
|
319
|
+
if vending_location not in self.vending_machines_used:
|
320
|
+
self.vending_machines_used.add(vending_location)
|
321
|
+
return 5.0
|
322
|
+
return 0.0
|
323
|
+
|
324
|
+
def _is_vending_machine_location(self, location: tuple) -> bool:
|
325
|
+
"""Check if location has a vending machine"""
|
326
|
+
# This would be loaded from game data
|
327
|
+
vending_locations = {
|
328
|
+
(5, 3, 15), # Example vending machine location
|
329
|
+
}
|
330
|
+
return location in vending_locations
|
@@ -0,0 +1,120 @@
|
|
1
|
+
"""
|
2
|
+
Novelty & Exploration Bonus Reward Components
|
3
|
+
|
4
|
+
Rewards for first-time experiences and curiosity.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
8
|
+
from typing import Dict, Any, Set
|
9
|
+
|
10
|
+
|
11
|
+
class FirstBattleReward(RewardComponent):
|
12
|
+
"""Reward for engaging in first battle - +20 points"""
|
13
|
+
|
14
|
+
def __init__(self):
|
15
|
+
self.first_battle = False
|
16
|
+
|
17
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
18
|
+
if self.first_battle:
|
19
|
+
return 0.0
|
20
|
+
|
21
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
22
|
+
current_in_battle = state["in_battle"]
|
23
|
+
|
24
|
+
if not prev_in_battle and current_in_battle:
|
25
|
+
self.first_battle = True
|
26
|
+
return 20.0
|
27
|
+
return 0.0
|
28
|
+
|
29
|
+
|
30
|
+
class FirstPokemonCenterVisitReward(RewardComponent):
|
31
|
+
"""Reward for first healing - +15 points"""
|
32
|
+
|
33
|
+
def __init__(self):
|
34
|
+
self.first_heal = False
|
35
|
+
self.pokemon_center_maps = {4, 8, 12, 16} # Pokemon Center map IDs
|
36
|
+
|
37
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
38
|
+
if self.first_heal:
|
39
|
+
return 0.0
|
40
|
+
|
41
|
+
if state["map_id"] in self.pokemon_center_maps:
|
42
|
+
# Check for HP restoration (simplified)
|
43
|
+
party = state.get("party", [])
|
44
|
+
for pokemon in party:
|
45
|
+
if pokemon.get("hp_current", 0) == pokemon.get("hp_max", 0):
|
46
|
+
self.first_heal = True
|
47
|
+
return 15.0
|
48
|
+
return 0.0
|
49
|
+
|
50
|
+
|
51
|
+
class FirstPokemartPurchaseReward(RewardComponent):
|
52
|
+
"""Reward for first item purchase - +10 points"""
|
53
|
+
|
54
|
+
def __init__(self):
|
55
|
+
self.first_purchase = False
|
56
|
+
|
57
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
58
|
+
if self.first_purchase:
|
59
|
+
return 0.0
|
60
|
+
|
61
|
+
prev_money = action.get("prev_money", 0)
|
62
|
+
current_money = state.get("money", 0)
|
63
|
+
|
64
|
+
if current_money < prev_money and prev_money > 0:
|
65
|
+
self.first_purchase = True
|
66
|
+
return 10.0
|
67
|
+
return 0.0
|
68
|
+
|
69
|
+
|
70
|
+
class FirstSaveReward(RewardComponent):
|
71
|
+
"""Reward for saving the game - +5 points"""
|
72
|
+
|
73
|
+
def __init__(self):
|
74
|
+
self.first_save = False
|
75
|
+
|
76
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
77
|
+
if self.first_save:
|
78
|
+
return 0.0
|
79
|
+
|
80
|
+
# This would need save detection
|
81
|
+
if state.get("game_saved", False):
|
82
|
+
self.first_save = True
|
83
|
+
return 5.0
|
84
|
+
return 0.0
|
85
|
+
|
86
|
+
|
87
|
+
class MenuExplorationReward(RewardComponent):
|
88
|
+
"""Reward for opening and exploring different menus - +3 points"""
|
89
|
+
|
90
|
+
def __init__(self):
|
91
|
+
self.menus_explored: Set[str] = set()
|
92
|
+
|
93
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
94
|
+
# This would need menu state tracking
|
95
|
+
# Placeholder implementation
|
96
|
+
return 0.0
|
97
|
+
|
98
|
+
|
99
|
+
class ButtonDiscoveryReward(RewardComponent):
|
100
|
+
"""Reward for discovering START menu, SELECT uses - +5 points"""
|
101
|
+
|
102
|
+
def __init__(self):
|
103
|
+
self.buttons_discovered: Set[str] = set()
|
104
|
+
|
105
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
106
|
+
# This would need button usage tracking
|
107
|
+
# Placeholder implementation
|
108
|
+
return 0.0
|
109
|
+
|
110
|
+
|
111
|
+
class FeatureDiscoveryReward(RewardComponent):
|
112
|
+
"""Reward for discovering PC, daycare, etc. - +10 points"""
|
113
|
+
|
114
|
+
def __init__(self):
|
115
|
+
self.features_discovered: Set[str] = set()
|
116
|
+
|
117
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
118
|
+
# This would need feature usage detection
|
119
|
+
# Placeholder implementation
|
120
|
+
return 0.0
|