synth-ai 0.2.4.dev3__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. synth_ai/environments/examples/__init__.py +1 -0
  2. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  3. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  4. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  5. synth_ai/environments/examples/crafter_classic/engine.py +575 -0
  6. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  7. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  8. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  9. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
  10. synth_ai/environments/examples/crafter_classic/environment.py +364 -0
  11. synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
  12. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
  13. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
  14. synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
  15. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
  16. synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
  17. synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
  18. synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
  19. synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
  20. synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
  21. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
  22. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
  23. synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
  24. synth_ai/environments/examples/crafter_custom/environment.py +312 -0
  25. synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
  26. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  27. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  28. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  29. synth_ai/environments/examples/enron/engine.py +291 -0
  30. synth_ai/environments/examples/enron/environment.py +165 -0
  31. synth_ai/environments/examples/enron/taskset.py +112 -0
  32. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  33. synth_ai/environments/examples/minigrid/engine.py +589 -0
  34. synth_ai/environments/examples/minigrid/environment.py +274 -0
  35. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  36. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  37. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  38. synth_ai/environments/examples/nethack/__init__.py +7 -0
  39. synth_ai/environments/examples/nethack/achievements.py +337 -0
  40. synth_ai/environments/examples/nethack/engine.py +738 -0
  41. synth_ai/environments/examples/nethack/environment.py +255 -0
  42. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  43. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  44. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  45. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  46. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  47. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  48. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  49. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  50. synth_ai/environments/examples/nethack/taskset.py +323 -0
  51. synth_ai/environments/examples/red/__init__.py +7 -0
  52. synth_ai/environments/examples/red/config_logging.py +110 -0
  53. synth_ai/environments/examples/red/engine.py +693 -0
  54. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  55. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  56. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  57. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  58. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  59. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  60. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  61. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  62. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  63. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  64. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  69. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  70. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  71. synth_ai/environments/examples/red/environment.py +235 -0
  72. synth_ai/environments/examples/red/taskset.py +77 -0
  73. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  74. synth_ai/environments/examples/sokoban/engine.py +675 -0
  75. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  76. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  77. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  78. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  79. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  80. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  81. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  82. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  87. synth_ai/environments/examples/sokoban/environment.py +228 -0
  88. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  89. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  90. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  91. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  92. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  93. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  94. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  95. synth_ai/environments/examples/verilog/__init__.py +10 -0
  96. synth_ai/environments/examples/verilog/engine.py +328 -0
  97. synth_ai/environments/examples/verilog/environment.py +349 -0
  98. synth_ai/environments/examples/verilog/taskset.py +418 -0
  99. synth_ai/tracing_v3/examples/basic_usage.py +188 -0
  100. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
  101. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +105 -6
  102. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
  103. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
  104. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
  105. {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,337 @@
1
+ """NetHack achievements and milestones tracking."""
2
+
3
+ import json
4
+ import os
5
+ from typing import Dict, Any, Optional, Tuple
6
+ from dataclasses import dataclass, field
7
+
8
+
9
+ # Exact copy of Balrog's Progress class
10
+ class Progress:
11
+ def __init__(self, achievements_path=None):
12
+ if achievements_path is None:
13
+ achievements_path = os.path.join(
14
+ os.path.dirname(__file__), "helpers", "achievements.json"
15
+ )
16
+
17
+ with open(achievements_path, "r") as f:
18
+ self.achievements = json.load(f)["3.4.3"]
19
+
20
+ self.dungeon_progression = 0
21
+ self.experience_progression = 0
22
+ self.ascension = False
23
+
24
+ def update(self, dungeon_name, experience_level):
25
+ """Update progression based on current dungeon and experience level."""
26
+ achievements_unlocked = []
27
+
28
+ if dungeon_name == "ascension":
29
+ if not self.ascension:
30
+ achievements_unlocked.append("ascension (100 points)")
31
+ self.ascension = True
32
+ return achievements_unlocked
33
+
34
+ # Update dungeon progression
35
+ if dungeon_name in self.achievements["dungeons"]:
36
+ new_progression = self.achievements["dungeons"][dungeon_name]
37
+ if new_progression > self.dungeon_progression:
38
+ old_score = self.dungeon_progression
39
+ self.dungeon_progression = new_progression
40
+ achievements_unlocked.append(
41
+ f"dungeon {dungeon_name} ({old_score} -> {new_progression} points)"
42
+ )
43
+
44
+ # Update experience progression
45
+ exp_key = f"lvl{experience_level}"
46
+ if exp_key in self.achievements["experience_levels"]:
47
+ new_progression = self.achievements["experience_levels"][exp_key]
48
+ if new_progression > self.experience_progression:
49
+ old_score = self.experience_progression
50
+ self.experience_progression = new_progression
51
+ achievements_unlocked.append(
52
+ f"experience {exp_key} ({old_score} -> {new_progression} points)"
53
+ )
54
+
55
+ return achievements_unlocked
56
+
57
+ @property
58
+ def percent(self):
59
+ """Return the BALROG evaluation score (0-100)."""
60
+ if self.ascension:
61
+ return 100.0
62
+ return max(self.dungeon_progression, self.experience_progression)
63
+
64
+
65
+ @dataclass
66
+ class NetHackAchievements:
67
+ """Track player achievements and milestones in NetHack."""
68
+
69
+ # Exploration achievements
70
+ depth_reached: int = 1
71
+ rooms_explored: int = 0
72
+ secret_doors_found: int = 0
73
+ stairs_down_found: int = 0
74
+ stairs_up_found: int = 0
75
+
76
+ # Combat achievements
77
+ monsters_killed: int = 0
78
+ peaceful_monsters_killed: int = 0
79
+ unique_monsters_killed: int = 0
80
+ kills_by_magic: int = 0
81
+ kills_by_melee: int = 0
82
+ kills_by_ranged: int = 0
83
+
84
+ # Item achievements
85
+ items_picked_up: int = 0
86
+ gold_collected: int = 0
87
+ scrolls_read: int = 0
88
+ potions_drunk: int = 0
89
+ spells_cast: int = 0
90
+ artifacts_found: int = 0
91
+
92
+ # Status achievements
93
+ max_level_reached: int = 1
94
+ max_hp_reached: int = 0
95
+ times_prayed: int = 0
96
+ successful_prayers: int = 0
97
+ times_polymorphed: int = 0
98
+
99
+ # Special achievements (boolean flags)
100
+ first_kill: bool = False
101
+ first_spell_cast: bool = False
102
+ first_prayer: bool = False
103
+ reached_minetown: bool = False
104
+ reached_mines_end: bool = False
105
+ reached_castle: bool = False
106
+ got_quest: bool = False
107
+ completed_quest: bool = False
108
+
109
+ # Survival achievements
110
+ turns_survived: int = 0
111
+ turns_without_damage: int = 0
112
+ traps_triggered: int = 0
113
+ traps_avoided: int = 0
114
+
115
+ # Negative achievements (for tracking mistakes)
116
+ times_died: int = 0
117
+ pets_killed: int = 0
118
+ shopkeepers_angered: int = 0
119
+
120
+ # Balrog progress tracker
121
+ balrog_progress: Progress = field(default_factory=Progress)
122
+
123
+ def to_dict(self) -> Dict[str, Any]:
124
+ """Convert achievements to dictionary."""
125
+ return {
126
+ # Exploration
127
+ "depth_reached": self.depth_reached,
128
+ "rooms_explored": self.rooms_explored,
129
+ "secret_doors_found": self.secret_doors_found,
130
+ "stairs_found": self.stairs_down_found + self.stairs_up_found,
131
+ # Combat
132
+ "monsters_killed": self.monsters_killed,
133
+ "unique_monsters_killed": self.unique_monsters_killed,
134
+ "kills_by_magic": self.kills_by_magic,
135
+ # Items
136
+ "items_collected": self.items_picked_up,
137
+ "gold_collected": self.gold_collected,
138
+ "artifacts_found": self.artifacts_found,
139
+ # Status
140
+ "max_level": self.max_level_reached,
141
+ "max_hp": self.max_hp_reached,
142
+ "successful_prayers": self.successful_prayers,
143
+ # Special (as booleans)
144
+ "first_kill": self.first_kill,
145
+ "first_spell_cast": self.first_spell_cast,
146
+ "reached_minetown": self.reached_minetown,
147
+ "got_quest": self.got_quest,
148
+ # Survival
149
+ "turns_survived": self.turns_survived,
150
+ "traps_avoided": self.traps_avoided,
151
+ # Balrog score
152
+ "balrog_score": self.balrog_progress.percent,
153
+ }
154
+
155
+ def get_unlocked_achievements(self) -> Dict[str, bool]:
156
+ """Get dictionary of which achievements have been unlocked."""
157
+ return {
158
+ # Depth milestones
159
+ "reached_dlvl_2": self.depth_reached >= 2,
160
+ "reached_dlvl_5": self.depth_reached >= 5,
161
+ "reached_dlvl_10": self.depth_reached >= 10,
162
+ "reached_dlvl_20": self.depth_reached >= 20,
163
+ # Kill milestones
164
+ "first_kill": self.first_kill,
165
+ "killed_10_monsters": self.monsters_killed >= 10,
166
+ "killed_50_monsters": self.monsters_killed >= 50,
167
+ "killed_100_monsters": self.monsters_killed >= 100,
168
+ "killed_by_magic": self.kills_by_magic > 0,
169
+ # Item milestones
170
+ "collected_100_gold": self.gold_collected >= 100,
171
+ "collected_1000_gold": self.gold_collected >= 1000,
172
+ "collected_10000_gold": self.gold_collected >= 10000,
173
+ "found_artifact": self.artifacts_found > 0,
174
+ # Level milestones
175
+ "reached_level_5": self.max_level_reached >= 5,
176
+ "reached_level_10": self.max_level_reached >= 10,
177
+ "reached_level_20": self.max_level_reached >= 20,
178
+ # Special locations
179
+ "reached_minetown": self.reached_minetown,
180
+ "reached_mines_end": self.reached_mines_end,
181
+ "reached_castle": self.reached_castle,
182
+ # Quest milestones
183
+ "got_quest": self.got_quest,
184
+ "completed_quest": self.completed_quest,
185
+ # Survival milestones
186
+ "survived_100_turns": self.turns_survived >= 100,
187
+ "survived_1000_turns": self.turns_survived >= 1000,
188
+ "survived_10000_turns": self.turns_survived >= 10000,
189
+ # Prayer milestones
190
+ "first_prayer": self.first_prayer,
191
+ "successful_prayer": self.successful_prayers > 0,
192
+ # Exploration milestones
193
+ "found_secret_door": self.secret_doors_found > 0,
194
+ "explored_10_rooms": self.rooms_explored >= 10,
195
+ "explored_50_rooms": self.rooms_explored >= 50,
196
+ }
197
+
198
+ def update_from_observation(
199
+ self, obs: Dict[str, Any], prev_obs: Optional[Dict[str, Any]] = None
200
+ ) -> Dict[str, bool]:
201
+ """Update achievements based on NLE observation. Returns newly unlocked achievements."""
202
+ newly_unlocked = {}
203
+ old_unlocked = self.get_unlocked_achievements()
204
+
205
+ # Update basic stats from player_stats - require it to exist
206
+ stats = obs["player_stats"]
207
+
208
+ # Update depth
209
+ current_depth = stats["depth"]
210
+ if current_depth > self.depth_reached:
211
+ self.depth_reached = current_depth
212
+
213
+ # Update level
214
+ current_level = stats["experience_level"]
215
+ if current_level > self.max_level_reached:
216
+ self.max_level_reached = current_level
217
+
218
+ # Update HP
219
+ current_hp = stats["max_hp"]
220
+ if current_hp > self.max_hp_reached:
221
+ self.max_hp_reached = current_hp
222
+
223
+ # Update gold
224
+ self.gold_collected = stats["gold"]
225
+
226
+ # Update turn count (if available)
227
+ if "turn" in stats:
228
+ self.turns_survived = stats["turn"]
229
+
230
+ # Update Balrog progress
231
+ # Map depth to dungeon name (simplified version)
232
+ dungeon_name = self._get_dungeon_name(current_depth)
233
+ balrog_achievements = self.balrog_progress.update(dungeon_name, current_level)
234
+
235
+ # Track balrog achievements as newly unlocked
236
+ for balrog_achievement in balrog_achievements:
237
+ newly_unlocked[f"balrog_{balrog_achievement}"] = True
238
+
239
+ # Check for kills (would need to parse messages or track HP changes)
240
+ if prev_obs and "message" in obs:
241
+ message = obs["message"]
242
+ if isinstance(message, bytes):
243
+ message = message.decode("ascii", errors="ignore").strip("\x00")
244
+ if "You kill" in message or "dies!" in message:
245
+ self.monsters_killed += 1
246
+ if not self.first_kill:
247
+ self.first_kill = True
248
+
249
+ # Check kill type
250
+ if "magic missile" in message or "spell" in message:
251
+ self.kills_by_magic += 1
252
+
253
+ # Check for new achievements
254
+ new_unlocked = self.get_unlocked_achievements()
255
+ for achievement, unlocked in new_unlocked.items():
256
+ if unlocked and not old_unlocked.get(achievement, False):
257
+ newly_unlocked[achievement] = True
258
+
259
+ return newly_unlocked
260
+
261
+ def _get_dungeon_name(self, depth: int) -> str:
262
+ """Map depth to dungeon name for Balrog progress tracking."""
263
+ # Simplified mapping - in real implementation would need more game state
264
+ if depth >= 50:
265
+ return "dlvl50"
266
+ elif depth >= 40:
267
+ return "dlvl40"
268
+ elif depth >= 30:
269
+ return "dlvl30"
270
+ elif depth >= 10:
271
+ return "dlvl10"
272
+ elif depth >= 5:
273
+ return "dlvl5"
274
+ else:
275
+ return "dlvl1"
276
+
277
+
278
+ def calculate_balrog_reward(
279
+ obs: Dict[str, Any], prev_obs: Optional[Dict[str, Any]] = None
280
+ ) -> float:
281
+ """
282
+ Calculate reward using exact Balrog-style reward function.
283
+
284
+ This is the exact implementation from Balrog that uses Progress class
285
+ to track game progression based on dungeons reached and experience levels.
286
+ """
287
+ # For compatibility with existing code, we'll use the simple delta rewards
288
+ # The actual Balrog score is tracked in NetHackAchievements.balrog_progress
289
+ reward = 0.0
290
+
291
+ if not prev_obs:
292
+ return reward
293
+
294
+ # Get player stats - require them to exist
295
+ stats = obs["player_stats"]
296
+ prev_stats = prev_obs["player_stats"]
297
+
298
+ # Score delta (most important in Balrog)
299
+ score_delta = stats["score"] - prev_stats["score"]
300
+ if score_delta > 0:
301
+ reward += score_delta / 100.0 # Scale down large score changes
302
+
303
+ # Gold delta
304
+ gold_delta = stats["gold"] - prev_stats["gold"]
305
+ if gold_delta > 0:
306
+ reward += gold_delta / 1000.0 # Small reward for gold
307
+
308
+ # Experience delta
309
+ exp_delta = stats["experience_points"] - prev_stats["experience_points"]
310
+ if exp_delta > 0:
311
+ reward += exp_delta / 100.0
312
+
313
+ # Depth progress - THIS SHOULD GIVE 10.0 REWARD FOR REACHING LEVEL 3!
314
+ depth_delta = stats["depth"] - prev_stats["depth"]
315
+ if depth_delta > 0:
316
+ reward += depth_delta * 10.0 # Big reward for going deeper
317
+
318
+ # Level up bonus
319
+ level_delta = stats["experience_level"] - prev_stats["experience_level"]
320
+ if level_delta > 0:
321
+ reward += level_delta * 5.0
322
+
323
+ # Death penalty
324
+ if "done" in obs and obs["done"]:
325
+ message = obs["message"] if "message" in obs else b""
326
+ if isinstance(message, bytes):
327
+ message = message.decode("ascii", errors="ignore")
328
+ if "died" in message.lower() or stats["hp"] <= 0:
329
+ reward -= 100.0 # Large death penalty
330
+
331
+ # Hunger penalty (if very hungry)
332
+ if "hunger" in stats:
333
+ hunger = stats["hunger"]
334
+ if hunger > 500: # Weak or worse
335
+ reward -= 0.1
336
+
337
+ return reward