synth-ai 0.2.4.dev3__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. synth_ai/environments/examples/__init__.py +1 -0
  2. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  3. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  4. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  5. synth_ai/environments/examples/crafter_classic/engine.py +575 -0
  6. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  7. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  8. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  9. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
  10. synth_ai/environments/examples/crafter_classic/environment.py +364 -0
  11. synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
  12. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
  13. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
  14. synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
  15. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
  16. synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
  17. synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
  18. synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
  19. synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
  20. synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
  21. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
  22. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
  23. synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
  24. synth_ai/environments/examples/crafter_custom/environment.py +312 -0
  25. synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
  26. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  27. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  28. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  29. synth_ai/environments/examples/enron/engine.py +291 -0
  30. synth_ai/environments/examples/enron/environment.py +165 -0
  31. synth_ai/environments/examples/enron/taskset.py +112 -0
  32. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  33. synth_ai/environments/examples/minigrid/engine.py +589 -0
  34. synth_ai/environments/examples/minigrid/environment.py +274 -0
  35. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  36. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  37. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  38. synth_ai/environments/examples/nethack/__init__.py +7 -0
  39. synth_ai/environments/examples/nethack/achievements.py +337 -0
  40. synth_ai/environments/examples/nethack/engine.py +738 -0
  41. synth_ai/environments/examples/nethack/environment.py +255 -0
  42. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  43. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  44. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  45. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  46. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  47. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  48. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  49. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  50. synth_ai/environments/examples/nethack/taskset.py +323 -0
  51. synth_ai/environments/examples/red/__init__.py +7 -0
  52. synth_ai/environments/examples/red/config_logging.py +110 -0
  53. synth_ai/environments/examples/red/engine.py +693 -0
  54. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  55. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  56. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  57. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  58. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  59. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  60. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  61. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  62. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  63. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  64. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  69. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  70. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  71. synth_ai/environments/examples/red/environment.py +235 -0
  72. synth_ai/environments/examples/red/taskset.py +77 -0
  73. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  74. synth_ai/environments/examples/sokoban/engine.py +675 -0
  75. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  76. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  77. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  78. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  79. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  80. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  81. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  82. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  87. synth_ai/environments/examples/sokoban/environment.py +228 -0
  88. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  89. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  90. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  91. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  92. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  93. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  94. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  95. synth_ai/environments/examples/verilog/__init__.py +10 -0
  96. synth_ai/environments/examples/verilog/engine.py +328 -0
  97. synth_ai/environments/examples/verilog/environment.py +349 -0
  98. synth_ai/environments/examples/verilog/taskset.py +418 -0
  99. synth_ai/tracing_v3/examples/basic_usage.py +188 -0
  100. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
  101. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +105 -6
  102. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
  103. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
  104. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
  105. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,283 @@
1
+ """
2
+ Battle & Combat Reward Components
3
+
4
+ Rewards for battle victories, combat strategy, and battle efficiency.
5
+ """
6
+
7
+ from synth_ai.environments.environment.rewards.core import RewardComponent
8
+ from typing import Dict, Any, Set
9
+
10
+
11
+ class WildPokemonDefeatedReward(RewardComponent):
12
+ """Reward for defeating wild Pokemon - +3 points per defeat"""
13
+
14
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
15
+ prev_in_battle = action.get("prev_in_battle", False)
16
+ current_in_battle = state["in_battle"]
17
+ battle_outcome = state.get("battle_outcome", 0)
18
+
19
+ # Exiting battle with victory (outcome = 1)
20
+ if prev_in_battle and not current_in_battle and battle_outcome == 1:
21
+ # Check if it was a wild Pokemon battle (no trainer)
22
+ # This would need additional state to distinguish wild vs trainer battles
23
+ return 3.0
24
+ return 0.0
25
+
26
+
27
+ class TrainerBattleVictoryReward(RewardComponent):
28
+ """Reward for defeating trainers - +15 points"""
29
+
30
+ def __init__(self):
31
+ self.trainers_defeated: Set[tuple] = set()
32
+
33
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
34
+ prev_in_battle = action.get("prev_in_battle", False)
35
+ current_in_battle = state["in_battle"]
36
+ battle_outcome = state.get("battle_outcome", 0)
37
+
38
+ # Exiting battle with victory
39
+ if prev_in_battle and not current_in_battle and battle_outcome == 1:
40
+ # Use location as trainer identifier
41
+ trainer_key = (state["player_x"], state["player_y"], state["map_id"])
42
+ if trainer_key not in self.trainers_defeated:
43
+ self.trainers_defeated.add(trainer_key)
44
+ return 15.0
45
+ return 0.0
46
+
47
+
48
+ class GymLeaderVictoryReward(RewardComponent):
49
+ """Reward for defeating gym leaders - +100 points"""
50
+
51
+ def __init__(self):
52
+ self.gym_leaders_defeated: Set[int] = set()
53
+ # Gym map IDs (would be loaded from game data)
54
+ self.gym_maps = {20, 21, 22, 23, 24, 25, 26, 27}
55
+
56
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
57
+ prev_in_battle = action.get("prev_in_battle", False)
58
+ current_in_battle = state["in_battle"]
59
+ battle_outcome = state.get("battle_outcome", 0)
60
+ current_map = state["map_id"]
61
+
62
+ # Victory in a gym
63
+ if (
64
+ prev_in_battle
65
+ and not current_in_battle
66
+ and battle_outcome == 1
67
+ and current_map in self.gym_maps
68
+ ):
69
+ if current_map not in self.gym_leaders_defeated:
70
+ self.gym_leaders_defeated.add(current_map)
71
+ return 100.0
72
+ return 0.0
73
+
74
+
75
+ class EliteFourMemberVictoryReward(RewardComponent):
76
+ """Reward for defeating Elite Four members - +200 points each"""
77
+
78
+ def __init__(self):
79
+ self.elite_four_defeated: Set[int] = set()
80
+ # Elite Four room IDs (would be loaded from game data)
81
+ self.elite_four_maps = {100, 101, 102, 103}
82
+
83
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
84
+ prev_in_battle = action.get("prev_in_battle", False)
85
+ current_in_battle = state["in_battle"]
86
+ battle_outcome = state.get("battle_outcome", 0)
87
+ current_map = state["map_id"]
88
+
89
+ # Victory against Elite Four
90
+ if (
91
+ prev_in_battle
92
+ and not current_in_battle
93
+ and battle_outcome == 1
94
+ and current_map in self.elite_four_maps
95
+ ):
96
+ if current_map not in self.elite_four_defeated:
97
+ self.elite_four_defeated.add(current_map)
98
+ return 200.0
99
+ return 0.0
100
+
101
+
102
+ class ChampionVictoryReward(RewardComponent):
103
+ """Reward for defeating the Champion - +500 points"""
104
+
105
+ def __init__(self):
106
+ self.champion_defeated = False
107
+ self.champion_map = 104 # Champion room ID
108
+
109
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
110
+ if self.champion_defeated:
111
+ return 0.0
112
+
113
+ prev_in_battle = action.get("prev_in_battle", False)
114
+ current_in_battle = state["in_battle"]
115
+ battle_outcome = state.get("battle_outcome", 0)
116
+ current_map = state["map_id"]
117
+
118
+ # Victory against Champion
119
+ if (
120
+ prev_in_battle
121
+ and not current_in_battle
122
+ and battle_outcome == 1
123
+ and current_map == self.champion_map
124
+ ):
125
+ self.champion_defeated = True
126
+ return 500.0
127
+ return 0.0
128
+
129
+
130
+ class TypeAdvantageUsageReward(RewardComponent):
131
+ """Reward for using super effective moves - +5 points"""
132
+
133
+ def __init__(self):
134
+ self.super_effective_count = 0
135
+
136
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
137
+ # This would need move effectiveness tracking
138
+ # Placeholder implementation - would need battle log analysis
139
+ if state["in_battle"]:
140
+ # Simplified: assume some moves are super effective
141
+ # Real implementation would track move types vs opponent types
142
+ move_used = action.get("move_used")
143
+ opponent_type = action.get("opponent_type")
144
+
145
+ if move_used and opponent_type:
146
+ if self._is_super_effective(move_used, opponent_type):
147
+ return 5.0
148
+ return 0.0
149
+
150
+ def _is_super_effective(self, move_type: str, opponent_type: str) -> bool:
151
+ """Check if move is super effective against opponent"""
152
+ # Simplified type effectiveness chart
153
+ effectiveness = {
154
+ ("water", "fire"): True,
155
+ ("fire", "grass"): True,
156
+ ("grass", "water"): True,
157
+ ("electric", "water"): True,
158
+ # Add more type matchups
159
+ }
160
+ return effectiveness.get((move_type, opponent_type), False)
161
+
162
+
163
+ class CriticalHitReward(RewardComponent):
164
+ """Reward for landing critical hits - +3 points"""
165
+
166
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
167
+ # This would need battle log analysis to detect critical hits
168
+ # Placeholder implementation
169
+ if state["in_battle"]:
170
+ critical_hit = action.get("critical_hit", False)
171
+ if critical_hit:
172
+ return 3.0
173
+ return 0.0
174
+
175
+
176
+ class StatusEffectUsageReward(RewardComponent):
177
+ """Reward for successfully applying status effects - +5 points"""
178
+
179
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
180
+ # This would need status effect tracking
181
+ # Placeholder implementation
182
+ if state["in_battle"]:
183
+ status_applied = action.get("status_applied")
184
+ if status_applied in ["paralysis", "poison", "sleep", "burn", "freeze"]:
185
+ return 5.0
186
+ return 0.0
187
+
188
+
189
+ class OHKOReward(RewardComponent):
190
+ """Reward for one-shot defeats - +10 points"""
191
+
192
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
193
+ # This would need damage tracking to detect OHKO
194
+ # Placeholder implementation
195
+ if state["in_battle"]:
196
+ opponent_defeated = action.get("opponent_defeated", False)
197
+ damage_dealt = action.get("damage_dealt", 0)
198
+ opponent_max_hp = action.get("opponent_max_hp", 100)
199
+
200
+ # OHKO if damage equals or exceeds max HP
201
+ if opponent_defeated and damage_dealt >= opponent_max_hp:
202
+ return 10.0
203
+ return 0.0
204
+
205
+
206
+ class FlawlessVictoryReward(RewardComponent):
207
+ """Reward for winning without taking damage - +20 points"""
208
+
209
+ def __init__(self):
210
+ self.battle_start_hp: Dict[int, int] = {} # Track HP at battle start
211
+
212
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
213
+ prev_in_battle = action.get("prev_in_battle", False)
214
+ current_in_battle = state["in_battle"]
215
+ battle_outcome = state.get("battle_outcome", 0)
216
+
217
+ # Track battle start
218
+ if not prev_in_battle and current_in_battle:
219
+ # Battle started - record current HP
220
+ party = state.get("party", [])
221
+ for i, pokemon in enumerate(party):
222
+ self.battle_start_hp[i] = pokemon.get("hp_current", 0)
223
+
224
+ # Check for flawless victory
225
+ elif prev_in_battle and not current_in_battle and battle_outcome == 1:
226
+ # Battle ended in victory - check if HP unchanged
227
+ party = state.get("party", [])
228
+ flawless = True
229
+ for i, pokemon in enumerate(party):
230
+ start_hp = self.battle_start_hp.get(i, 0)
231
+ current_hp = pokemon.get("hp_current", 0)
232
+ if current_hp < start_hp:
233
+ flawless = False
234
+ break
235
+
236
+ # Clear battle HP tracking
237
+ self.battle_start_hp.clear()
238
+
239
+ if flawless:
240
+ return 20.0
241
+
242
+ return 0.0
243
+
244
+
245
+ class UnderleveledVictoryReward(RewardComponent):
246
+ """Reward for winning with significantly lower level Pokemon - +25 points"""
247
+
248
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
249
+ prev_in_battle = action.get("prev_in_battle", False)
250
+ current_in_battle = state["in_battle"]
251
+ battle_outcome = state.get("battle_outcome", 0)
252
+
253
+ # Victory with level disadvantage
254
+ if prev_in_battle and not current_in_battle and battle_outcome == 1:
255
+ player_level = action.get("player_pokemon_level", 0)
256
+ opponent_level = action.get("opponent_pokemon_level", 0)
257
+
258
+ # Reward if player Pokemon is 5+ levels lower
259
+ if opponent_level - player_level >= 5:
260
+ return 25.0
261
+ return 0.0
262
+
263
+
264
+ class BattleStreakReward(RewardComponent):
265
+ """Reward for consecutive battle wins - +5 points per battle in streak"""
266
+
267
+ def __init__(self):
268
+ self.current_streak = 0
269
+
270
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
271
+ prev_in_battle = action.get("prev_in_battle", False)
272
+ current_in_battle = state["in_battle"]
273
+ battle_outcome = state.get("battle_outcome", 0)
274
+
275
+ # Battle ended
276
+ if prev_in_battle and not current_in_battle:
277
+ if battle_outcome == 1: # Victory
278
+ self.current_streak += 1
279
+ return 5.0
280
+ else: # Loss or other outcome
281
+ self.current_streak = 0
282
+
283
+ return 0.0
@@ -0,0 +1,149 @@
1
+ """
2
+ Composite & Milestone Reward Components
3
+
4
+ Rewards for achievement combinations and progression streaks.
5
+ """
6
+
7
+ from synth_ai.environments.environment.rewards.core import RewardComponent
8
+ from typing import Dict, Any, Set
9
+
10
+
11
+ class PerfectGymRunReward(RewardComponent):
12
+ """Reward for defeating gym without losing any Pokemon - +200 points"""
13
+
14
+ def __init__(self):
15
+ self.gym_maps = {20, 21, 22, 23, 24, 25, 26, 27}
16
+ self.perfect_gyms: Set[int] = set()
17
+ self.gym_start_party_state: Dict[int, list] = {}
18
+
19
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
20
+ current_map = state["map_id"]
21
+
22
+ if current_map in self.gym_maps:
23
+ # Track gym entry
24
+ prev_map = action.get("prev_map_id", -1)
25
+ if prev_map not in self.gym_maps:
26
+ # Entering gym - record party state
27
+ self.gym_start_party_state[current_map] = state.get("party", [])
28
+
29
+ # Check for gym completion
30
+ prev_in_battle = action.get("prev_in_battle", False)
31
+ current_in_battle = state["in_battle"]
32
+ battle_outcome = state.get("battle_outcome", 0)
33
+
34
+ if (
35
+ prev_in_battle
36
+ and not current_in_battle
37
+ and battle_outcome == 1
38
+ and current_map not in self.perfect_gyms
39
+ ):
40
+ # Gym leader defeated - check if perfect run
41
+ start_party = self.gym_start_party_state.get(current_map, [])
42
+ current_party = state.get("party", [])
43
+
44
+ # Check if all Pokemon maintained their HP
45
+ perfect = True
46
+ for i, (start_pkmn, current_pkmn) in enumerate(zip(start_party, current_party)):
47
+ if current_pkmn.get("hp_current", 0) < start_pkmn.get("hp_current", 0):
48
+ perfect = False
49
+ break
50
+
51
+ if perfect:
52
+ self.perfect_gyms.add(current_map)
53
+ return 200.0
54
+
55
+ return 0.0
56
+
57
+
58
+ class AreaMasteryReward(RewardComponent):
59
+ """Reward for full area completion - +100 points"""
60
+
61
+ def __init__(self):
62
+ self.mastered_areas: Set[int] = set()
63
+
64
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
65
+ # Placeholder - would need comprehensive area tracking
66
+ return 0.0
67
+
68
+
69
+ class SpeedrunMilestonesReward(RewardComponent):
70
+ """Reward for reaching story points within time limits - +50 points"""
71
+
72
+ def __init__(self):
73
+ self.milestones_reached: Set[str] = set()
74
+
75
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
76
+ # Placeholder - would need time tracking
77
+ return 0.0
78
+
79
+
80
+ class ExplorationStreakReward(RewardComponent):
81
+ """Reward for consecutive new area discoveries - +2 points per consecutive area"""
82
+
83
+ def __init__(self):
84
+ self.streak = 0
85
+ self.last_area = -1
86
+
87
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
88
+ current_map = state["map_id"]
89
+ prev_map = action.get("prev_map_id", -1)
90
+
91
+ if current_map != prev_map and current_map != self.last_area:
92
+ # New area discovered
93
+ self.streak += 1
94
+ self.last_area = current_map
95
+ return 2.0 * self.streak
96
+ elif current_map == prev_map:
97
+ # Stayed in same area - reset streak
98
+ self.streak = 0
99
+
100
+ return 0.0
101
+
102
+
103
+ class BattleWinStreakReward(RewardComponent):
104
+ """Reward for consecutive battle wins - +3 points per consecutive win"""
105
+
106
+ def __init__(self):
107
+ self.win_streak = 0
108
+
109
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
110
+ prev_in_battle = action.get("prev_in_battle", False)
111
+ current_in_battle = state["in_battle"]
112
+ battle_outcome = state.get("battle_outcome", 0)
113
+
114
+ if prev_in_battle and not current_in_battle:
115
+ if battle_outcome == 1: # Victory
116
+ self.win_streak += 1
117
+ return 3.0 * self.win_streak
118
+ else: # Loss
119
+ self.win_streak = 0
120
+
121
+ return 0.0
122
+
123
+
124
+ class PerfectDayReward(RewardComponent):
125
+ """Reward for a session with no Pokemon fainting - +100 points"""
126
+
127
+ def __init__(self):
128
+ self.perfect_day_achieved = False
129
+ self.any_pokemon_fainted = False
130
+
131
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
132
+ if self.perfect_day_achieved:
133
+ return 0.0
134
+
135
+ # Check if any Pokemon fainted
136
+ party = state.get("party", [])
137
+ for pokemon in party:
138
+ if pokemon.get("hp_current", 1) == 0:
139
+ self.any_pokemon_fainted = True
140
+ break
141
+
142
+ # Check for end of session (would need session detection)
143
+ # Placeholder implementation
144
+ step_count = state.get("step_count", 0)
145
+ if step_count >= 100 and not self.any_pokemon_fainted: # Example session length
146
+ self.perfect_day_achieved = True
147
+ return 100.0
148
+
149
+ return 0.0
@@ -0,0 +1,137 @@
1
+ """
2
+ Economy & Resource Management Reward Components
3
+
4
+ Rewards for money management and inventory optimization.
5
+ """
6
+
7
+ from synth_ai.environments.environment.rewards.core import RewardComponent
8
+ from typing import Dict, Any, Set
9
+
10
+
11
+ class FirstEarningsReward(RewardComponent):
12
+ """Reward for earning first money from battles - +10 points"""
13
+
14
+ def __init__(self):
15
+ self.first_earnings = False
16
+
17
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
18
+ if self.first_earnings:
19
+ return 0.0
20
+
21
+ prev_money = action.get("prev_money", 0)
22
+ current_money = state.get("money", 0)
23
+
24
+ if current_money > prev_money and prev_money == 0:
25
+ self.first_earnings = True
26
+ return 10.0
27
+ return 0.0
28
+
29
+
30
+ class WealthMilestonesReward(RewardComponent):
31
+ """Reward for reaching money milestones - +25 points"""
32
+
33
+ def __init__(self):
34
+ self.milestones_reached: Set[int] = set()
35
+ self.milestones = [1000, 5000, 10000, 50000]
36
+
37
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
38
+ current_money = state.get("money", 0)
39
+ total_reward = 0.0
40
+
41
+ for milestone in self.milestones:
42
+ if current_money >= milestone and milestone not in self.milestones_reached:
43
+ self.milestones_reached.add(milestone)
44
+ total_reward += 25.0
45
+
46
+ return total_reward
47
+
48
+
49
+ class SmartPurchasesReward(RewardComponent):
50
+ """Reward for buying useful items - +10 points"""
51
+
52
+ def __init__(self):
53
+ self.useful_items = {4, 5, 6, 10, 11, 12} # Pokeballs, Potions
54
+
55
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
56
+ prev_inventory = action.get("prev_inventory", [])
57
+ current_inventory = state.get("inventory", [])
58
+ prev_money = action.get("prev_money", 0)
59
+ current_money = state.get("money", 0)
60
+
61
+ # Money decreased (purchase made)
62
+ if current_money < prev_money:
63
+ prev_items = {item.get("item_id") for item in prev_inventory}
64
+ current_items = {item.get("item_id") for item in current_inventory}
65
+ new_items = current_items - prev_items
66
+
67
+ for item_id in new_items:
68
+ if item_id in self.useful_items:
69
+ return 10.0
70
+ return 0.0
71
+
72
+
73
+ class RarePurchaseReward(RewardComponent):
74
+ """Reward for buying expensive items - +20 points"""
75
+
76
+ def __init__(self):
77
+ self.expensive_items = {50, 51, 52} # TMs, evolution stones
78
+
79
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
80
+ prev_inventory = action.get("prev_inventory", [])
81
+ current_inventory = state.get("inventory", [])
82
+ prev_money = action.get("prev_money", 0)
83
+ current_money = state.get("money", 0)
84
+
85
+ # Money decreased significantly (expensive purchase)
86
+ if current_money < prev_money - 1000:
87
+ prev_items = {item.get("item_id") for item in prev_inventory}
88
+ current_items = {item.get("item_id") for item in current_inventory}
89
+ new_items = current_items - prev_items
90
+
91
+ for item_id in new_items:
92
+ if item_id in self.expensive_items:
93
+ return 20.0
94
+ return 0.0
95
+
96
+
97
+ class InventoryOrganizationReward(RewardComponent):
98
+ """Reward for effective bag management - +5 points"""
99
+
100
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
101
+ # This would need inventory management tracking
102
+ # Placeholder implementation
103
+ return 0.0
104
+
105
+
106
+ class HealingItemUsageReward(RewardComponent):
107
+ """Reward for timely use of potions/healing items - +3 points"""
108
+
109
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
110
+ # Check for healing item usage when Pokemon HP is low
111
+ party = state.get("party", [])
112
+ prev_inventory = action.get("prev_inventory", [])
113
+ current_inventory = state.get("inventory", [])
114
+
115
+ # Item count decreased (item used)
116
+ if len(current_inventory) < len(prev_inventory):
117
+ for pokemon in party:
118
+ hp_percentage = pokemon.get("hp_current", 0) / max(pokemon.get("hp_max", 1), 1)
119
+ if hp_percentage < 0.5: # Low HP
120
+ return 3.0
121
+ return 0.0
122
+
123
+
124
+ class PokeballEfficiencyReward(RewardComponent):
125
+ """Reward for successful captures - +5 points"""
126
+
127
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
128
+ # Check for Pokemon capture (party size increase + pokeball usage)
129
+ prev_party_count = len(action.get("prev_party", []))
130
+ current_party_count = len(state.get("party", []))
131
+ prev_inventory = action.get("prev_inventory", [])
132
+ current_inventory = state.get("inventory", [])
133
+
134
+ # Pokemon captured and pokeball used
135
+ if current_party_count > prev_party_count and len(current_inventory) < len(prev_inventory):
136
+ return 5.0
137
+ return 0.0
@@ -0,0 +1,56 @@
1
+ """
2
+ Efficiency & Optimization Reward Components
3
+
4
+ Rewards for optimal play, routing, and game knowledge.
5
+ """
6
+
7
+ from synth_ai.environments.environment.rewards.core import RewardComponent
8
+ from typing import Dict, Any
9
+
10
+
11
+ class FastTravelUsageReward(RewardComponent):
12
+ """Reward for using Fly effectively - +10 points"""
13
+
14
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
15
+ # Placeholder - would detect Fly usage
16
+ return 0.0
17
+
18
+
19
+ class OptimalRoutingReward(RewardComponent):
20
+ """Reward for taking efficient paths - +15 points"""
21
+
22
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
23
+ # Placeholder - would analyze path efficiency
24
+ return 0.0
25
+
26
+
27
+ class PuzzleSolvingReward(RewardComponent):
28
+ """Reward for solving puzzles quickly - +25 points"""
29
+
30
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
31
+ # Placeholder - would detect puzzle completion
32
+ return 0.0
33
+
34
+
35
+ class MoveEffectivenessReward(RewardComponent):
36
+ """Reward for consistently using type advantages - +8 points"""
37
+
38
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
39
+ # Placeholder - would track type effectiveness usage
40
+ return 0.0
41
+
42
+
43
+ class EvolutionTimingReward(RewardComponent):
44
+ """Reward for evolving Pokemon at optimal times - +15 points"""
45
+
46
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
47
+ # Placeholder - would analyze evolution timing
48
+ return 0.0
49
+
50
+
51
+ class HMUsageReward(RewardComponent):
52
+ """Reward for using HMs in appropriate situations - +10 points"""
53
+
54
+ async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
55
+ # Placeholder - would detect appropriate HM usage
56
+ return 0.0