synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +575 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
- synth_ai/environments/examples/crafter_classic/environment.py +364 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
- synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
- synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
- synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
- synth_ai/environments/examples/crafter_custom/environment.py +312 -0
- synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +104 -6
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,337 @@
|
|
1
|
+
"""NetHack achievements and milestones tracking."""
|
2
|
+
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from typing import Dict, Any, Optional, Tuple
|
6
|
+
from dataclasses import dataclass, field
|
7
|
+
|
8
|
+
|
9
|
+
# Exact copy of Balrog's Progress class
|
10
|
+
class Progress:
|
11
|
+
def __init__(self, achievements_path=None):
|
12
|
+
if achievements_path is None:
|
13
|
+
achievements_path = os.path.join(
|
14
|
+
os.path.dirname(__file__), "helpers", "achievements.json"
|
15
|
+
)
|
16
|
+
|
17
|
+
with open(achievements_path, "r") as f:
|
18
|
+
self.achievements = json.load(f)["3.4.3"]
|
19
|
+
|
20
|
+
self.dungeon_progression = 0
|
21
|
+
self.experience_progression = 0
|
22
|
+
self.ascension = False
|
23
|
+
|
24
|
+
def update(self, dungeon_name, experience_level):
|
25
|
+
"""Update progression based on current dungeon and experience level."""
|
26
|
+
achievements_unlocked = []
|
27
|
+
|
28
|
+
if dungeon_name == "ascension":
|
29
|
+
if not self.ascension:
|
30
|
+
achievements_unlocked.append("ascension (100 points)")
|
31
|
+
self.ascension = True
|
32
|
+
return achievements_unlocked
|
33
|
+
|
34
|
+
# Update dungeon progression
|
35
|
+
if dungeon_name in self.achievements["dungeons"]:
|
36
|
+
new_progression = self.achievements["dungeons"][dungeon_name]
|
37
|
+
if new_progression > self.dungeon_progression:
|
38
|
+
old_score = self.dungeon_progression
|
39
|
+
self.dungeon_progression = new_progression
|
40
|
+
achievements_unlocked.append(
|
41
|
+
f"dungeon {dungeon_name} ({old_score} -> {new_progression} points)"
|
42
|
+
)
|
43
|
+
|
44
|
+
# Update experience progression
|
45
|
+
exp_key = f"lvl{experience_level}"
|
46
|
+
if exp_key in self.achievements["experience_levels"]:
|
47
|
+
new_progression = self.achievements["experience_levels"][exp_key]
|
48
|
+
if new_progression > self.experience_progression:
|
49
|
+
old_score = self.experience_progression
|
50
|
+
self.experience_progression = new_progression
|
51
|
+
achievements_unlocked.append(
|
52
|
+
f"experience {exp_key} ({old_score} -> {new_progression} points)"
|
53
|
+
)
|
54
|
+
|
55
|
+
return achievements_unlocked
|
56
|
+
|
57
|
+
@property
|
58
|
+
def percent(self):
|
59
|
+
"""Return the BALROG evaluation score (0-100)."""
|
60
|
+
if self.ascension:
|
61
|
+
return 100.0
|
62
|
+
return max(self.dungeon_progression, self.experience_progression)
|
63
|
+
|
64
|
+
|
65
|
+
@dataclass
|
66
|
+
class NetHackAchievements:
|
67
|
+
"""Track player achievements and milestones in NetHack."""
|
68
|
+
|
69
|
+
# Exploration achievements
|
70
|
+
depth_reached: int = 1
|
71
|
+
rooms_explored: int = 0
|
72
|
+
secret_doors_found: int = 0
|
73
|
+
stairs_down_found: int = 0
|
74
|
+
stairs_up_found: int = 0
|
75
|
+
|
76
|
+
# Combat achievements
|
77
|
+
monsters_killed: int = 0
|
78
|
+
peaceful_monsters_killed: int = 0
|
79
|
+
unique_monsters_killed: int = 0
|
80
|
+
kills_by_magic: int = 0
|
81
|
+
kills_by_melee: int = 0
|
82
|
+
kills_by_ranged: int = 0
|
83
|
+
|
84
|
+
# Item achievements
|
85
|
+
items_picked_up: int = 0
|
86
|
+
gold_collected: int = 0
|
87
|
+
scrolls_read: int = 0
|
88
|
+
potions_drunk: int = 0
|
89
|
+
spells_cast: int = 0
|
90
|
+
artifacts_found: int = 0
|
91
|
+
|
92
|
+
# Status achievements
|
93
|
+
max_level_reached: int = 1
|
94
|
+
max_hp_reached: int = 0
|
95
|
+
times_prayed: int = 0
|
96
|
+
successful_prayers: int = 0
|
97
|
+
times_polymorphed: int = 0
|
98
|
+
|
99
|
+
# Special achievements (boolean flags)
|
100
|
+
first_kill: bool = False
|
101
|
+
first_spell_cast: bool = False
|
102
|
+
first_prayer: bool = False
|
103
|
+
reached_minetown: bool = False
|
104
|
+
reached_mines_end: bool = False
|
105
|
+
reached_castle: bool = False
|
106
|
+
got_quest: bool = False
|
107
|
+
completed_quest: bool = False
|
108
|
+
|
109
|
+
# Survival achievements
|
110
|
+
turns_survived: int = 0
|
111
|
+
turns_without_damage: int = 0
|
112
|
+
traps_triggered: int = 0
|
113
|
+
traps_avoided: int = 0
|
114
|
+
|
115
|
+
# Negative achievements (for tracking mistakes)
|
116
|
+
times_died: int = 0
|
117
|
+
pets_killed: int = 0
|
118
|
+
shopkeepers_angered: int = 0
|
119
|
+
|
120
|
+
# Balrog progress tracker
|
121
|
+
balrog_progress: Progress = field(default_factory=Progress)
|
122
|
+
|
123
|
+
def to_dict(self) -> Dict[str, Any]:
|
124
|
+
"""Convert achievements to dictionary."""
|
125
|
+
return {
|
126
|
+
# Exploration
|
127
|
+
"depth_reached": self.depth_reached,
|
128
|
+
"rooms_explored": self.rooms_explored,
|
129
|
+
"secret_doors_found": self.secret_doors_found,
|
130
|
+
"stairs_found": self.stairs_down_found + self.stairs_up_found,
|
131
|
+
# Combat
|
132
|
+
"monsters_killed": self.monsters_killed,
|
133
|
+
"unique_monsters_killed": self.unique_monsters_killed,
|
134
|
+
"kills_by_magic": self.kills_by_magic,
|
135
|
+
# Items
|
136
|
+
"items_collected": self.items_picked_up,
|
137
|
+
"gold_collected": self.gold_collected,
|
138
|
+
"artifacts_found": self.artifacts_found,
|
139
|
+
# Status
|
140
|
+
"max_level": self.max_level_reached,
|
141
|
+
"max_hp": self.max_hp_reached,
|
142
|
+
"successful_prayers": self.successful_prayers,
|
143
|
+
# Special (as booleans)
|
144
|
+
"first_kill": self.first_kill,
|
145
|
+
"first_spell_cast": self.first_spell_cast,
|
146
|
+
"reached_minetown": self.reached_minetown,
|
147
|
+
"got_quest": self.got_quest,
|
148
|
+
# Survival
|
149
|
+
"turns_survived": self.turns_survived,
|
150
|
+
"traps_avoided": self.traps_avoided,
|
151
|
+
# Balrog score
|
152
|
+
"balrog_score": self.balrog_progress.percent,
|
153
|
+
}
|
154
|
+
|
155
|
+
def get_unlocked_achievements(self) -> Dict[str, bool]:
|
156
|
+
"""Get dictionary of which achievements have been unlocked."""
|
157
|
+
return {
|
158
|
+
# Depth milestones
|
159
|
+
"reached_dlvl_2": self.depth_reached >= 2,
|
160
|
+
"reached_dlvl_5": self.depth_reached >= 5,
|
161
|
+
"reached_dlvl_10": self.depth_reached >= 10,
|
162
|
+
"reached_dlvl_20": self.depth_reached >= 20,
|
163
|
+
# Kill milestones
|
164
|
+
"first_kill": self.first_kill,
|
165
|
+
"killed_10_monsters": self.monsters_killed >= 10,
|
166
|
+
"killed_50_monsters": self.monsters_killed >= 50,
|
167
|
+
"killed_100_monsters": self.monsters_killed >= 100,
|
168
|
+
"killed_by_magic": self.kills_by_magic > 0,
|
169
|
+
# Item milestones
|
170
|
+
"collected_100_gold": self.gold_collected >= 100,
|
171
|
+
"collected_1000_gold": self.gold_collected >= 1000,
|
172
|
+
"collected_10000_gold": self.gold_collected >= 10000,
|
173
|
+
"found_artifact": self.artifacts_found > 0,
|
174
|
+
# Level milestones
|
175
|
+
"reached_level_5": self.max_level_reached >= 5,
|
176
|
+
"reached_level_10": self.max_level_reached >= 10,
|
177
|
+
"reached_level_20": self.max_level_reached >= 20,
|
178
|
+
# Special locations
|
179
|
+
"reached_minetown": self.reached_minetown,
|
180
|
+
"reached_mines_end": self.reached_mines_end,
|
181
|
+
"reached_castle": self.reached_castle,
|
182
|
+
# Quest milestones
|
183
|
+
"got_quest": self.got_quest,
|
184
|
+
"completed_quest": self.completed_quest,
|
185
|
+
# Survival milestones
|
186
|
+
"survived_100_turns": self.turns_survived >= 100,
|
187
|
+
"survived_1000_turns": self.turns_survived >= 1000,
|
188
|
+
"survived_10000_turns": self.turns_survived >= 10000,
|
189
|
+
# Prayer milestones
|
190
|
+
"first_prayer": self.first_prayer,
|
191
|
+
"successful_prayer": self.successful_prayers > 0,
|
192
|
+
# Exploration milestones
|
193
|
+
"found_secret_door": self.secret_doors_found > 0,
|
194
|
+
"explored_10_rooms": self.rooms_explored >= 10,
|
195
|
+
"explored_50_rooms": self.rooms_explored >= 50,
|
196
|
+
}
|
197
|
+
|
198
|
+
def update_from_observation(
|
199
|
+
self, obs: Dict[str, Any], prev_obs: Optional[Dict[str, Any]] = None
|
200
|
+
) -> Dict[str, bool]:
|
201
|
+
"""Update achievements based on NLE observation. Returns newly unlocked achievements."""
|
202
|
+
newly_unlocked = {}
|
203
|
+
old_unlocked = self.get_unlocked_achievements()
|
204
|
+
|
205
|
+
# Update basic stats from player_stats - require it to exist
|
206
|
+
stats = obs["player_stats"]
|
207
|
+
|
208
|
+
# Update depth
|
209
|
+
current_depth = stats["depth"]
|
210
|
+
if current_depth > self.depth_reached:
|
211
|
+
self.depth_reached = current_depth
|
212
|
+
|
213
|
+
# Update level
|
214
|
+
current_level = stats["experience_level"]
|
215
|
+
if current_level > self.max_level_reached:
|
216
|
+
self.max_level_reached = current_level
|
217
|
+
|
218
|
+
# Update HP
|
219
|
+
current_hp = stats["max_hp"]
|
220
|
+
if current_hp > self.max_hp_reached:
|
221
|
+
self.max_hp_reached = current_hp
|
222
|
+
|
223
|
+
# Update gold
|
224
|
+
self.gold_collected = stats["gold"]
|
225
|
+
|
226
|
+
# Update turn count (if available)
|
227
|
+
if "turn" in stats:
|
228
|
+
self.turns_survived = stats["turn"]
|
229
|
+
|
230
|
+
# Update Balrog progress
|
231
|
+
# Map depth to dungeon name (simplified version)
|
232
|
+
dungeon_name = self._get_dungeon_name(current_depth)
|
233
|
+
balrog_achievements = self.balrog_progress.update(dungeon_name, current_level)
|
234
|
+
|
235
|
+
# Track balrog achievements as newly unlocked
|
236
|
+
for balrog_achievement in balrog_achievements:
|
237
|
+
newly_unlocked[f"balrog_{balrog_achievement}"] = True
|
238
|
+
|
239
|
+
# Check for kills (would need to parse messages or track HP changes)
|
240
|
+
if prev_obs and "message" in obs:
|
241
|
+
message = obs["message"]
|
242
|
+
if isinstance(message, bytes):
|
243
|
+
message = message.decode("ascii", errors="ignore").strip("\x00")
|
244
|
+
if "You kill" in message or "dies!" in message:
|
245
|
+
self.monsters_killed += 1
|
246
|
+
if not self.first_kill:
|
247
|
+
self.first_kill = True
|
248
|
+
|
249
|
+
# Check kill type
|
250
|
+
if "magic missile" in message or "spell" in message:
|
251
|
+
self.kills_by_magic += 1
|
252
|
+
|
253
|
+
# Check for new achievements
|
254
|
+
new_unlocked = self.get_unlocked_achievements()
|
255
|
+
for achievement, unlocked in new_unlocked.items():
|
256
|
+
if unlocked and not old_unlocked.get(achievement, False):
|
257
|
+
newly_unlocked[achievement] = True
|
258
|
+
|
259
|
+
return newly_unlocked
|
260
|
+
|
261
|
+
def _get_dungeon_name(self, depth: int) -> str:
|
262
|
+
"""Map depth to dungeon name for Balrog progress tracking."""
|
263
|
+
# Simplified mapping - in real implementation would need more game state
|
264
|
+
if depth >= 50:
|
265
|
+
return "dlvl50"
|
266
|
+
elif depth >= 40:
|
267
|
+
return "dlvl40"
|
268
|
+
elif depth >= 30:
|
269
|
+
return "dlvl30"
|
270
|
+
elif depth >= 10:
|
271
|
+
return "dlvl10"
|
272
|
+
elif depth >= 5:
|
273
|
+
return "dlvl5"
|
274
|
+
else:
|
275
|
+
return "dlvl1"
|
276
|
+
|
277
|
+
|
278
|
+
def calculate_balrog_reward(
|
279
|
+
obs: Dict[str, Any], prev_obs: Optional[Dict[str, Any]] = None
|
280
|
+
) -> float:
|
281
|
+
"""
|
282
|
+
Calculate reward using exact Balrog-style reward function.
|
283
|
+
|
284
|
+
This is the exact implementation from Balrog that uses Progress class
|
285
|
+
to track game progression based on dungeons reached and experience levels.
|
286
|
+
"""
|
287
|
+
# For compatibility with existing code, we'll use the simple delta rewards
|
288
|
+
# The actual Balrog score is tracked in NetHackAchievements.balrog_progress
|
289
|
+
reward = 0.0
|
290
|
+
|
291
|
+
if not prev_obs:
|
292
|
+
return reward
|
293
|
+
|
294
|
+
# Get player stats - require them to exist
|
295
|
+
stats = obs["player_stats"]
|
296
|
+
prev_stats = prev_obs["player_stats"]
|
297
|
+
|
298
|
+
# Score delta (most important in Balrog)
|
299
|
+
score_delta = stats["score"] - prev_stats["score"]
|
300
|
+
if score_delta > 0:
|
301
|
+
reward += score_delta / 100.0 # Scale down large score changes
|
302
|
+
|
303
|
+
# Gold delta
|
304
|
+
gold_delta = stats["gold"] - prev_stats["gold"]
|
305
|
+
if gold_delta > 0:
|
306
|
+
reward += gold_delta / 1000.0 # Small reward for gold
|
307
|
+
|
308
|
+
# Experience delta
|
309
|
+
exp_delta = stats["experience_points"] - prev_stats["experience_points"]
|
310
|
+
if exp_delta > 0:
|
311
|
+
reward += exp_delta / 100.0
|
312
|
+
|
313
|
+
# Depth progress - THIS SHOULD GIVE 10.0 REWARD FOR REACHING LEVEL 3!
|
314
|
+
depth_delta = stats["depth"] - prev_stats["depth"]
|
315
|
+
if depth_delta > 0:
|
316
|
+
reward += depth_delta * 10.0 # Big reward for going deeper
|
317
|
+
|
318
|
+
# Level up bonus
|
319
|
+
level_delta = stats["experience_level"] - prev_stats["experience_level"]
|
320
|
+
if level_delta > 0:
|
321
|
+
reward += level_delta * 5.0
|
322
|
+
|
323
|
+
# Death penalty
|
324
|
+
if "done" in obs and obs["done"]:
|
325
|
+
message = obs["message"] if "message" in obs else b""
|
326
|
+
if isinstance(message, bytes):
|
327
|
+
message = message.decode("ascii", errors="ignore")
|
328
|
+
if "died" in message.lower() or stats["hp"] <= 0:
|
329
|
+
reward -= 100.0 # Large death penalty
|
330
|
+
|
331
|
+
# Hunger penalty (if very hungry)
|
332
|
+
if "hunger" in stats:
|
333
|
+
hunger = stats["hunger"]
|
334
|
+
if hunger > 500: # Weak or worse
|
335
|
+
reward -= 0.1
|
336
|
+
|
337
|
+
return reward
|