synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. synth_ai/environments/examples/__init__.py +1 -0
  2. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  3. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  4. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  5. synth_ai/environments/examples/crafter_classic/engine.py +575 -0
  6. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  7. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  8. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  9. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
  10. synth_ai/environments/examples/crafter_classic/environment.py +364 -0
  11. synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
  12. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
  13. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
  14. synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
  15. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
  16. synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
  17. synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
  18. synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
  19. synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
  20. synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
  21. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
  22. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
  23. synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
  24. synth_ai/environments/examples/crafter_custom/environment.py +312 -0
  25. synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
  26. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  27. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  28. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  29. synth_ai/environments/examples/enron/engine.py +291 -0
  30. synth_ai/environments/examples/enron/environment.py +165 -0
  31. synth_ai/environments/examples/enron/taskset.py +112 -0
  32. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  33. synth_ai/environments/examples/minigrid/engine.py +589 -0
  34. synth_ai/environments/examples/minigrid/environment.py +274 -0
  35. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  36. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  37. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  38. synth_ai/environments/examples/nethack/__init__.py +7 -0
  39. synth_ai/environments/examples/nethack/achievements.py +337 -0
  40. synth_ai/environments/examples/nethack/engine.py +738 -0
  41. synth_ai/environments/examples/nethack/environment.py +255 -0
  42. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  43. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  44. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  45. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  46. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  47. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  48. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  49. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  50. synth_ai/environments/examples/nethack/taskset.py +323 -0
  51. synth_ai/environments/examples/red/__init__.py +7 -0
  52. synth_ai/environments/examples/red/config_logging.py +110 -0
  53. synth_ai/environments/examples/red/engine.py +693 -0
  54. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  55. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  56. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  57. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  58. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  59. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  60. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  61. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  62. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  63. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  64. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  69. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  70. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  71. synth_ai/environments/examples/red/environment.py +235 -0
  72. synth_ai/environments/examples/red/taskset.py +77 -0
  73. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  74. synth_ai/environments/examples/sokoban/engine.py +675 -0
  75. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  76. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  77. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  78. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  79. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  80. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  81. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  82. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  87. synth_ai/environments/examples/sokoban/environment.py +228 -0
  88. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  89. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  90. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  91. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  92. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  93. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  94. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  95. synth_ai/environments/examples/verilog/__init__.py +10 -0
  96. synth_ai/environments/examples/verilog/engine.py +328 -0
  97. synth_ai/environments/examples/verilog/environment.py +349 -0
  98. synth_ai/environments/examples/verilog/taskset.py +418 -0
  99. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
  100. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +104 -6
  101. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
  102. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
  103. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
  104. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,583 @@
1
+ """MiniGrid TaskSet implementation.
2
+
3
+ This module provides task generation and management for MiniGrid environments,
4
+ including procedural generation and task categorization.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import random
11
+ from dataclasses import dataclass, field
12
+ from typing import Any, Dict, List, Optional, Set, Tuple
13
+ from uuid import uuid4
14
+
15
+ from synth_ai.environments.tasks.api import (
16
+ Task,
17
+ TaskInstance,
18
+ TaskInstanceMetadata,
19
+ TaskInstanceSet,
20
+ SplitInfo,
21
+ Impetus,
22
+ Intent,
23
+ )
24
+ from synth_ai.environments.examples.minigrid.environment_mapping import (
25
+ get_environment_from_seed,
26
+ get_difficulty_from_seed,
27
+ get_all_environments,
28
+ )
29
+ from synth_ai.environments.examples.minigrid.puzzle_loader import (
30
+ get_puzzle_loader,
31
+ get_puzzle_by_seed,
32
+ MiniGridPuzzle,
33
+ )
34
+
35
+
36
+ @dataclass
37
+ class MiniGridTaskInstanceMetadata(TaskInstanceMetadata):
38
+ """Metadata for a MiniGrid task instance."""
39
+
40
+ env_name: str
41
+ grid_size: Tuple[int, int]
42
+ difficulty: str # "ultra-easy", "easy", "medium", "hard", "ultra-hard", "specialized"
43
+ has_key: bool = False
44
+ has_door: bool = False
45
+ has_lava: bool = False
46
+ num_objects: int = 0
47
+ optimal_path_length: Optional[int] = None
48
+ seed: Optional[int] = None
49
+
50
+ @classmethod
51
+ def from_seed(cls, seed: int) -> "MiniGridTaskInstanceMetadata":
52
+ """Create metadata from seed using environment mapping."""
53
+ env_name = get_environment_from_seed(seed)
54
+ difficulty = get_difficulty_from_seed(seed)
55
+
56
+ # Extract grid size from environment name (approximate)
57
+ grid_size = (5, 5) # Default
58
+ if "5x5" in env_name:
59
+ grid_size = (5, 5)
60
+ elif "6x6" in env_name:
61
+ grid_size = (6, 6)
62
+ elif "8x8" in env_name:
63
+ grid_size = (8, 8)
64
+ elif "16x16" in env_name:
65
+ grid_size = (16, 16)
66
+ elif "FourRooms" in env_name:
67
+ grid_size = (19, 19)
68
+ elif "MultiRoom-N2" in env_name:
69
+ grid_size = (15, 15)
70
+ elif "MultiRoom-N4" in env_name:
71
+ grid_size = (19, 19)
72
+ elif "MultiRoom-N6" in env_name:
73
+ grid_size = (25, 25)
74
+ elif "LavaGapS5" in env_name:
75
+ grid_size = (5, 7)
76
+ elif "LavaGapS6" in env_name:
77
+ grid_size = (6, 8)
78
+ elif "LavaGapS7" in env_name:
79
+ grid_size = (7, 9)
80
+ elif "Crossing" in env_name and "S9" in env_name:
81
+ grid_size = (9, 9)
82
+ elif "Crossing" in env_name and "S11" in env_name:
83
+ grid_size = (11, 11)
84
+
85
+ # Determine features based on environment name
86
+ has_key = "DoorKey" in env_name or "Unlock" in env_name or "KeyCorridor" in env_name
87
+ has_door = "Door" in env_name or "Room" in env_name or "Unlock" in env_name
88
+ has_lava = "Lava" in env_name
89
+
90
+ # Estimate number of objects
91
+ num_objects = 0
92
+ if has_key:
93
+ num_objects += 1
94
+ if has_door:
95
+ num_objects += 1
96
+ if "Pickup" in env_name:
97
+ num_objects += 1
98
+ if "Fetch" in env_name:
99
+ if "N2" in env_name:
100
+ num_objects += 2
101
+ elif "N3" in env_name:
102
+ num_objects += 3
103
+
104
+ return cls(
105
+ env_name=env_name,
106
+ grid_size=grid_size,
107
+ difficulty=difficulty,
108
+ has_key=has_key,
109
+ has_door=has_door,
110
+ has_lava=has_lava,
111
+ num_objects=num_objects,
112
+ seed=seed,
113
+ )
114
+
115
+
116
+ @dataclass
117
+ class MiniGridTaskInstance(TaskInstance):
118
+ """A specific MiniGrid task instance."""
119
+
120
+ async def serialize(self) -> dict:
121
+ """Serialize the task instance to dict."""
122
+ return {
123
+ "id": str(self.id),
124
+ "impetus": {
125
+ "instructions": self.impetus.instructions,
126
+ },
127
+ "intent": {
128
+ "rubric": self.intent.rubric,
129
+ "gold_trajectories": self.intent.gold_trajectories,
130
+ "gold_state_diff": self.intent.gold_state_diff,
131
+ },
132
+ "metadata": {
133
+ "env_name": self.metadata.env_name,
134
+ "grid_size": list(self.metadata.grid_size),
135
+ "difficulty": self.metadata.difficulty,
136
+ "has_key": self.metadata.has_key,
137
+ "has_door": self.metadata.has_door,
138
+ "has_lava": self.metadata.has_lava,
139
+ "num_objects": self.metadata.num_objects,
140
+ "optimal_path_length": self.metadata.optimal_path_length,
141
+ "seed": self.metadata.seed,
142
+ },
143
+ "is_reproducible": self.is_reproducible,
144
+ }
145
+
146
+ @classmethod
147
+ async def deserialize(cls, data: dict) -> "MiniGridTaskInstance":
148
+ """Deserialize a task instance from dict."""
149
+ return cls(
150
+ id=uuid4() if "id" not in data else data["id"],
151
+ impetus=Impetus(
152
+ instructions=data["impetus"]["instructions"],
153
+ ),
154
+ intent=Intent(
155
+ rubric=data["intent"]["rubric"],
156
+ gold_trajectories=data["intent"].get("gold_trajectories"),
157
+ gold_state_diff=data["intent"].get("gold_state_diff", {}),
158
+ ),
159
+ metadata=MiniGridTaskInstanceMetadata(
160
+ env_name=data["metadata"]["env_name"],
161
+ grid_size=tuple(data["metadata"]["grid_size"]),
162
+ difficulty=data["metadata"]["difficulty"],
163
+ has_key=data["metadata"].get("has_key", False),
164
+ has_door=data["metadata"].get("has_door", False),
165
+ has_lava=data["metadata"].get("has_lava", False),
166
+ num_objects=data["metadata"].get("num_objects", 0),
167
+ optimal_path_length=data["metadata"].get("optimal_path_length"),
168
+ seed=data["metadata"].get("seed"),
169
+ ),
170
+ is_reproducible=data.get("is_reproducible", True),
171
+ initial_engine_snapshot=data.get("initial_engine_snapshot", None),
172
+ )
173
+
174
+
175
+ # Predefined environment configurations
176
+ ENVIRONMENTS = {
177
+ "easy": [
178
+ ("MiniGrid-Empty-5x5-v0", (5, 5)),
179
+ ("MiniGrid-Empty-6x6-v0", (6, 6)),
180
+ ("MiniGrid-Empty-8x8-v0", (8, 8)),
181
+ ("MiniGrid-FourRooms-v0", (19, 19)),
182
+ ],
183
+ "medium": [
184
+ ("MiniGrid-DoorKey-5x5-v0", (5, 5)),
185
+ ("MiniGrid-DoorKey-6x6-v0", (6, 6)),
186
+ ("MiniGrid-DoorKey-8x8-v0", (8, 8)),
187
+ ("MiniGrid-Unlock-v0", (8, 8)),
188
+ ("MiniGrid-UnlockPickup-v0", (8, 8)),
189
+ ],
190
+ "hard": [
191
+ ("MiniGrid-DoorKey-16x16-v0", (16, 16)),
192
+ ("MiniGrid-MultiRoom-N2-S4-v0", (15, 15)),
193
+ ("MiniGrid-MultiRoom-N4-S5-v0", (19, 19)),
194
+ ("MiniGrid-MultiRoom-N6-v0", (25, 25)),
195
+ ("MiniGrid-LavaGapS5-v0", (5, 7)),
196
+ ("MiniGrid-LavaGapS6-v0", (6, 8)),
197
+ ("MiniGrid-LavaGapS7-v0", (7, 9)),
198
+ ("MiniGrid-LavaCrossingS9N1-v0", (9, 9)),
199
+ ("MiniGrid-LavaCrossingS9N2-v0", (9, 9)),
200
+ ("MiniGrid-LavaCrossingS9N3-v0", (9, 9)),
201
+ ],
202
+ }
203
+
204
+
205
+ async def create_minigrid_taskset(
206
+ num_tasks_per_difficulty: Optional[Dict[str, int]] = None,
207
+ seed: Optional[int] = None,
208
+ ) -> TaskInstanceSet:
209
+ """Generate MiniGrid task instances.
210
+
211
+ Args:
212
+ num_tasks_per_difficulty: Number of tasks to generate for each difficulty.
213
+ Defaults to {"easy": 10, "medium": 10, "hard": 10}
214
+ seed: Random seed for reproducibility
215
+
216
+ Returns:
217
+ TaskInstanceSet with train/val/test splits
218
+ """
219
+ if num_tasks_per_difficulty is None:
220
+ num_tasks_per_difficulty = {"easy": 10, "medium": 10, "hard": 10}
221
+
222
+ if seed is not None:
223
+ random.seed(seed)
224
+
225
+ instances = []
226
+
227
+ for difficulty, num_tasks in num_tasks_per_difficulty.items():
228
+ if difficulty not in ENVIRONMENTS:
229
+ continue
230
+
231
+ envs = ENVIRONMENTS[difficulty]
232
+
233
+ for i in range(num_tasks):
234
+ # Select random environment
235
+ env_name, grid_size = random.choice(envs)
236
+
237
+ # Determine features based on environment name
238
+ has_key = "DoorKey" in env_name or "Unlock" in env_name
239
+ has_door = "Door" in env_name or "Room" in env_name
240
+ has_lava = "Lava" in env_name
241
+
242
+ # Estimate number of objects
243
+ num_objects = 0
244
+ if has_key:
245
+ num_objects += 1
246
+ if has_door:
247
+ num_objects += 1
248
+ if "Pickup" in env_name:
249
+ num_objects += 1
250
+
251
+ # Create task-specific instructions with clear symbol explanations
252
+ instructions = f"Navigate the {grid_size[0]}x{grid_size[1]} grid to reach the goal marked with 'G'."
253
+
254
+ # Add specific instructions based on environment features
255
+ if has_lava:
256
+ instructions += (
257
+ " Avoid stepping on lava tiles marked with 'L' as they will end your mission."
258
+ )
259
+ if has_key and has_door:
260
+ instructions += " You must first pick up the key marked with 'K', then use it to unlock doors marked with 'D'."
261
+ elif has_door:
262
+ instructions += " Navigate through doors marked with 'D' to reach different rooms."
263
+
264
+ # Add general navigation help
265
+ if env_name.startswith("MiniGrid-Empty"):
266
+ instructions += " The grid contains walls (#) that block movement and empty spaces (.) you can move through."
267
+ elif "FourRooms" in env_name:
268
+ instructions += " The grid is divided into four rooms connected by openings. Find the path between rooms to reach the goal."
269
+ elif "MultiRoom" in env_name:
270
+ instructions += (
271
+ " Navigate through multiple connected rooms to find and reach the goal."
272
+ )
273
+
274
+ # Always remind about the goal and exploration
275
+ instructions += " Note: You have limited vision and may need to explore the maze to find the goal. Look for the green goal square marked with 'G' - it may not be visible initially, so explore systematically to discover it."
276
+
277
+ # Create rubric
278
+ rubric = {
279
+ "goal": f"Successfully complete the {env_name} environment by reaching the goal.",
280
+ "success_criteria": [
281
+ "Reach the goal tile marked with 'G'",
282
+ "Avoid illegal moves or getting stuck",
283
+ ],
284
+ }
285
+
286
+ if has_lava:
287
+ rubric["success_criteria"].append("Do not step on lava tiles")
288
+ if has_key:
289
+ rubric["success_criteria"].append("Pick up the key before attempting to open doors")
290
+
291
+ # Generate unique seed for this task
292
+ task_seed = random.randint(0, 1000000)
293
+
294
+ # Create task instance
295
+ impetus = Impetus(instructions=instructions)
296
+ intent = Intent(
297
+ rubric=rubric,
298
+ gold_trajectories=None,
299
+ gold_state_diff={},
300
+ )
301
+
302
+ metadata = MiniGridTaskInstanceMetadata(
303
+ env_name=env_name,
304
+ grid_size=grid_size,
305
+ difficulty=difficulty,
306
+ has_key=has_key,
307
+ has_door=has_door,
308
+ has_lava=has_lava,
309
+ num_objects=num_objects,
310
+ seed=task_seed,
311
+ )
312
+
313
+ instance = MiniGridTaskInstance(
314
+ id=uuid4(),
315
+ impetus=impetus,
316
+ intent=intent,
317
+ metadata=metadata,
318
+ is_reproducible=True,
319
+ initial_engine_snapshot=None,
320
+ )
321
+
322
+ instances.append(instance)
323
+
324
+ # Create splits (70% train, 15% val, 15% test)
325
+ n_total = len(instances)
326
+ n_val = max(1, int(n_total * 0.15))
327
+ n_test = max(1, int(n_total * 0.15))
328
+
329
+ # Shuffle and split
330
+ random.shuffle(instances)
331
+
332
+ val_ids = {inst.id for inst in instances[:n_val]}
333
+ test_ids = {inst.id for inst in instances[n_val : n_val + n_test]}
334
+
335
+ split_info = SplitInfo(
336
+ val_instance_ids=val_ids,
337
+ test_instance_ids=test_ids,
338
+ _is_split_defined=True,
339
+ )
340
+
341
+ return TaskInstanceSet(
342
+ name="MiniGrid TaskSet",
343
+ description="Diverse MiniGrid navigation tasks across multiple environments and difficulties",
344
+ instances=instances,
345
+ split_info=split_info,
346
+ )
347
+
348
+
349
+ def create_minigrid_task_from_seed(seed: int) -> MiniGridTaskInstance:
350
+ """
351
+ Create a MiniGrid task instance from a seed using environment mapping.
352
+
353
+ Args:
354
+ seed: Integer seed for environment selection
355
+
356
+ Returns:
357
+ MiniGrid task instance
358
+ """
359
+ metadata = MiniGridTaskInstanceMetadata.from_seed(seed)
360
+
361
+ # Create task-specific instructions based on environment and features
362
+ instructions = f"Navigate the {metadata.grid_size[0]}x{metadata.grid_size[1]} grid to reach the goal marked with 'G'."
363
+
364
+ # Add specific instructions based on environment features
365
+ if metadata.has_lava:
366
+ instructions += (
367
+ " Avoid stepping on lava tiles marked with 'L' as they will end your mission."
368
+ )
369
+ if metadata.has_key and metadata.has_door:
370
+ instructions += " You must first pick up the key marked with 'K', then use it to unlock doors marked with 'D'."
371
+ elif metadata.has_door:
372
+ instructions += " Navigate through doors marked with 'D' to reach different rooms."
373
+
374
+ # Add general navigation help
375
+ if "Empty" in metadata.env_name:
376
+ instructions += " The grid contains walls (#) that block movement and empty spaces (.) you can move through."
377
+ elif "FourRooms" in metadata.env_name:
378
+ instructions += " The grid is divided into four rooms connected by openings. Find the path between rooms to reach the goal."
379
+ elif "MultiRoom" in metadata.env_name:
380
+ instructions += " Navigate through multiple connected rooms to find and reach the goal."
381
+ elif "Memory" in metadata.env_name:
382
+ instructions += " Remember the initial object you see, then navigate to find the matching object at the end."
383
+ elif "Fetch" in metadata.env_name:
384
+ instructions += " Pick up the correct object as specified in your mission."
385
+ elif "PutNear" in metadata.env_name:
386
+ instructions += " Pick up objects and place them near other objects as instructed."
387
+
388
+ instructions += " You have limited vision and may need to explore to find the goal. Look for the green goal square marked with 'G'."
389
+
390
+ # Create rubric based on environment
391
+ rubric = {
392
+ "goal": f"Successfully complete the {metadata.env_name} environment by reaching the goal.",
393
+ "success_criteria": [
394
+ "Reach the goal tile marked with 'G'",
395
+ "Avoid illegal moves or getting stuck",
396
+ ],
397
+ }
398
+
399
+ if metadata.has_lava:
400
+ rubric["success_criteria"].append("Do not step on lava tiles")
401
+ if metadata.has_key:
402
+ rubric["success_criteria"].append("Pick up the key before attempting to open doors")
403
+
404
+ return MiniGridTaskInstance(
405
+ id=uuid4(),
406
+ impetus=Impetus(instructions=instructions),
407
+ intent=Intent(
408
+ rubric=rubric,
409
+ gold_trajectories=None,
410
+ gold_state_diff={},
411
+ ),
412
+ metadata=metadata,
413
+ is_reproducible=True,
414
+ initial_engine_snapshot=None,
415
+ )
416
+
417
+
418
+ # Default task instance for quick testing (uses seed 0 - MiniGrid-Empty-5x5-v0)
419
+ DEFAULT_MINIGRID_TASK = create_minigrid_task_from_seed(0)
420
+
421
+
422
+ async def create_easy_minigrid_taskset(num_instances: int = 50) -> TaskInstanceSet:
423
+ """Create a taskset with only easy difficulty puzzles."""
424
+ return await create_filtered_minigrid_taskset(
425
+ difficulties=["easy"], num_instances_per_difficulty=num_instances
426
+ )
427
+
428
+
429
+ async def create_filtered_minigrid_taskset(
430
+ difficulties: List[str], num_instances_per_difficulty: int = 10
431
+ ) -> TaskInstanceSet:
432
+ """
433
+ Create a taskset with only specified difficulties.
434
+
435
+ Args:
436
+ difficulties: List of difficulty levels to include
437
+ num_instances_per_difficulty: Number of instances per difficulty
438
+
439
+ Returns:
440
+ TaskInstanceSet with only the specified difficulties
441
+ """
442
+ instances = []
443
+
444
+ # Load puzzle loader
445
+ puzzle_loader = get_puzzle_loader()
446
+ puzzle_loader.load_puzzles()
447
+
448
+ for difficulty in difficulties:
449
+ available_puzzles = puzzle_loader.get_puzzles_by_difficulty(difficulty)
450
+
451
+ if not available_puzzles:
452
+ print(f"Warning: No puzzles found for difficulty {difficulty}")
453
+ continue
454
+
455
+ # Take up to num_instances_per_difficulty puzzles
456
+ puzzles_to_use = available_puzzles[:num_instances_per_difficulty]
457
+ print(f"Using {len(puzzles_to_use)} puzzles for {difficulty} difficulty")
458
+
459
+ for puzzle in puzzles_to_use:
460
+ task_instance = await create_minigrid_task_from_puzzle(puzzle)
461
+ instances.append(task_instance)
462
+
463
+ # Create simple split info for filtered set
464
+ val_ids = {inst.id for inst in instances[::3]} # Every 3rd instance for validation
465
+ test_ids = {inst.id for inst in instances[1::3]} # Every 3rd starting from 1 for test
466
+ split_info = SplitInfo(
467
+ val_instance_ids=val_ids,
468
+ test_instance_ids=test_ids,
469
+ _is_split_defined=True,
470
+ )
471
+
472
+ return TaskInstanceSet(
473
+ name="MiniGrid Filtered TaskSet",
474
+ description=f"Filtered MiniGrid tasks for difficulties: {', '.join(difficulties)}",
475
+ instances=instances,
476
+ split_info=split_info,
477
+ )
478
+
479
+
480
+ async def create_minigrid_task_from_seed(difficulty: str, seed: int) -> MiniGridTaskInstance:
481
+ """
482
+ Create a single MiniGrid task instance from a specific seed.
483
+ Uses modular arithmetic to deterministically select a puzzle.
484
+
485
+ Args:
486
+ difficulty: The difficulty level
487
+ seed: Seed for deterministic puzzle selection
488
+
489
+ Returns:
490
+ Single MiniGridTaskInstance
491
+ """
492
+ puzzle = get_puzzle_by_seed(difficulty, seed)
493
+ if not puzzle:
494
+ raise ValueError(f"No puzzles available for difficulty '{difficulty}'")
495
+
496
+ return await create_minigrid_task_from_puzzle(puzzle)
497
+
498
+
499
+ async def create_minigrid_task_from_puzzle(puzzle: MiniGridPuzzle) -> MiniGridTaskInstance:
500
+ """
501
+ Create a MiniGrid task instance from a puzzle.
502
+
503
+ Args:
504
+ puzzle: MiniGridPuzzle instance
505
+
506
+ Returns:
507
+ MiniGridTaskInstance
508
+ """
509
+ instance_id = uuid4()
510
+
511
+ # Create instructions based on puzzle features
512
+ instructions = puzzle.mission_description
513
+
514
+ # Add specific instructions based on environment features
515
+ if puzzle.has_lava:
516
+ instructions += (
517
+ " Avoid stepping on lava tiles marked with 'L' as they will end your mission."
518
+ )
519
+ if puzzle.has_key and puzzle.has_door:
520
+ instructions += " You must first pick up the key marked with 'K', then use it to unlock doors marked with 'D'."
521
+ elif puzzle.has_door:
522
+ instructions += " Navigate through doors marked with 'D' to reach different rooms."
523
+
524
+ # Add general navigation help
525
+ if "Empty" in puzzle.environment_name:
526
+ instructions += " The grid contains walls (#) that block movement and empty spaces (.) you can move through."
527
+ elif "FourRooms" in puzzle.environment_name:
528
+ instructions += " The grid is divided into four rooms connected by openings. Find the path between rooms to reach the goal."
529
+ elif "MultiRoom" in puzzle.environment_name:
530
+ instructions += " Navigate through multiple connected rooms to find and reach the goal."
531
+ elif "Memory" in puzzle.environment_name:
532
+ instructions += " Remember the initial object you see, then navigate to find the matching object at the end."
533
+ elif "Fetch" in puzzle.environment_name:
534
+ instructions += " Pick up the correct object as specified in your mission."
535
+ elif "PutNear" in puzzle.environment_name:
536
+ instructions += " Pick up objects and place them near other objects as instructed."
537
+
538
+ # Add movement commands
539
+ instructions += (
540
+ "\n\nAvailable actions:\n"
541
+ "- 'left': Turn left\n"
542
+ "- 'right': Turn right\n"
543
+ "- 'forward': Move forward\n"
544
+ "- 'pickup': Pick up an object\n"
545
+ "- 'drop': Drop a carried object\n"
546
+ "- 'toggle': Open/close doors or interact with objects\n"
547
+ "- 'done': Complete the mission when you reach the goal"
548
+ )
549
+
550
+ impetus = Impetus(instructions=instructions)
551
+ intent = Intent(
552
+ rubric={"goal": "Navigate to the goal position efficiently."},
553
+ gold_trajectories=None,
554
+ gold_state_diff={},
555
+ )
556
+
557
+ # Create enhanced metadata
558
+ metadata = MiniGridTaskInstanceMetadata(
559
+ env_name=puzzle.environment_name,
560
+ grid_size=puzzle.grid_size,
561
+ difficulty=puzzle.difficulty,
562
+ has_key=puzzle.has_key,
563
+ has_door=puzzle.has_door,
564
+ has_lava=puzzle.has_lava,
565
+ num_objects=puzzle.num_objects,
566
+ optimal_path_length=puzzle.estimated_steps,
567
+ seed=puzzle.seed,
568
+ )
569
+
570
+ task_instance = MiniGridTaskInstance(
571
+ id=instance_id,
572
+ impetus=impetus,
573
+ intent=intent,
574
+ metadata=metadata,
575
+ is_reproducible=True,
576
+ initial_engine_snapshot=None,
577
+ )
578
+
579
+ return task_instance
580
+
581
+
582
+ # Module-level export for compatibility
583
+ taskset = create_minigrid_taskset
@@ -0,0 +1,7 @@
1
+ """NetHack environment implementation for synth-env framework."""
2
+
3
+ __all__ = ["NetHackEngine", "NetHackEnvironment", "create_nethack_taskset"]
4
+
5
+ from .engine import NetHackEngine
6
+ from .environment import NetHackEnvironment
7
+ from .taskset import create_nethack_taskset