synth-ai 0.2.9.dev3__py3-none-any.whl → 0.2.9.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (107) hide show
  1. examples/analyze_semantic_words.sh +17 -0
  2. examples/common_old/backend.py +21 -0
  3. examples/crafter_debug_render.py +180 -0
  4. examples/evals_old/README.md +98 -0
  5. examples/evals_old/__init__.py +6 -0
  6. examples/evals_old/compare_models.py +1037 -0
  7. examples/evals_old/example_log.md +145 -0
  8. examples/evals_old/run_demo.sh +126 -0
  9. examples/evals_old/trace_analysis.py +270 -0
  10. examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
  11. examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
  12. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
  13. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
  14. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
  15. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
  16. examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
  17. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
  18. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
  19. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
  20. examples/finetuning_old/synth_qwen_v1/README.md +68 -0
  21. examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
  22. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
  23. examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
  24. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
  25. examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
  26. examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
  27. examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
  28. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
  29. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
  30. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
  31. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
  32. examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
  33. examples/finetuning_old/synth_qwen_v1/util.py +147 -0
  34. examples/rl/README.md +169 -0
  35. examples/rl/configs/eval_base_qwen.toml +15 -0
  36. examples/rl/configs/eval_rl_qwen.toml +11 -0
  37. examples/rl/configs/rl_from_base_qwen.toml +35 -0
  38. examples/rl/configs/rl_from_base_qwen17.toml +74 -0
  39. examples/rl/configs/rl_from_ft_qwen.toml +35 -0
  40. examples/rl/download_dataset.py +64 -0
  41. examples/rl/run_eval.py +435 -0
  42. examples/rl/run_rl_and_save.py +94 -0
  43. examples/rl/task_app/README.md +22 -0
  44. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
  45. examples/rl/task_app/math_task_app.py +107 -0
  46. examples/rl_old/task_app.py +962 -0
  47. examples/run_crafter_demo.sh +10 -0
  48. examples/warming_up_to_rl/analyze_trace_db.py +420 -0
  49. examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
  50. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  51. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
  52. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
  53. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
  54. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
  55. examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
  56. examples/warming_up_to_rl/export_trace_sft.py +541 -0
  57. examples/warming_up_to_rl/groq_test.py +88 -0
  58. examples/warming_up_to_rl/manage_secrets.py +127 -0
  59. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  60. examples/warming_up_to_rl/old/notes.md +73 -0
  61. examples/warming_up_to_rl/readme.md +172 -0
  62. examples/warming_up_to_rl/run_eval.py +434 -0
  63. examples/warming_up_to_rl/run_fft_and_save.py +309 -0
  64. examples/warming_up_to_rl/run_local_rollout.py +188 -0
  65. examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
  66. examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
  67. examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
  68. examples/warming_up_to_rl/run_rl_and_save.py +101 -0
  69. examples/warming_up_to_rl/run_rollout_remote.py +129 -0
  70. examples/warming_up_to_rl/task_app/README.md +38 -0
  71. {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
  72. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
  73. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  74. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  75. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
  76. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
  77. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  78. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +58 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
  98. synth_ai/api/train/config_finder.py +18 -18
  99. synth_ai/api/train/env_resolver.py +28 -1
  100. synth_ai/cli/task_apps.py +264 -55
  101. synth_ai/task/apps/__init__.py +54 -13
  102. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/METADATA +1 -1
  103. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/RECORD +107 -12
  104. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/top_level.txt +1 -0
  105. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/WHEEL +0 -0
  106. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/entry_points.txt +0 -0
  107. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1271 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any, Dict, List, Optional
5
+ import json
6
+
7
+ from fastapi import APIRouter, HTTPException
8
+ from pydantic import BaseModel
9
+
10
+ from uuid import uuid4
11
+
12
+ # Import the actual classes from synth-ai
13
+ from synth_ai.environments.examples.crafter_classic.environment import (
14
+ CrafterClassicEnvironment,
15
+ )
16
+ from synth_ai.environments.examples.crafter_classic.taskset import (
17
+ CrafterTaskInstance,
18
+ CrafterTaskInstanceMetadata,
19
+ )
20
+ from synth_ai.environments.tasks.core import Impetus, Intent
21
+
22
+ from .envs.crafter.environment import CrafterEnvironmentWrapper
23
+ from .registry import registry
24
+ from .storage.volume import storage
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ router = APIRouter()
29
+
30
+
31
+ async def validate_environment_observation(observation: Any, context: str) -> None:
32
+ """
33
+ Validate that an environment observation has the correct structure.
34
+ This ensures the environment wrapper is producing valid observations.
35
+
36
+ Args:
37
+ observation: The observation to validate
38
+ context: Context string for logging (e.g., "initialize", "step")
39
+ """
40
+ if observation is None:
41
+ raise ValueError(f"Environment observation cannot be None in {context}")
42
+
43
+ if not isinstance(observation, dict):
44
+ raise ValueError(
45
+ f"Environment observation must be dict in {context}, got {type(observation)}"
46
+ )
47
+
48
+ # For Wordle environments, validate specific structure
49
+ # Check if this looks like a Wordle observation by looking for Wordle-specific keys
50
+ wordle_keys = {
51
+ "text",
52
+ "status",
53
+ "remaining_guesses",
54
+ "guesses",
55
+ "feedback",
56
+ "reward_last",
57
+ "total_reward",
58
+ "terminated",
59
+ }
60
+ if wordle_keys.issubset(set(observation.keys())):
61
+ logger.info(
62
+ f"🔍 ENV_ROUTES: Validating Wordle observation structure in {context}"
63
+ )
64
+ logger.info(f"🔍 ENV_ROUTES: Observation keys: {list(observation.keys())}")
65
+
66
+ missing_keys = wordle_keys - set(observation.keys())
67
+ if missing_keys:
68
+ logger.error(
69
+ f"❌ ENV_ROUTES: Wordle observation missing required keys in {context}: {missing_keys}"
70
+ )
71
+ logger.error(f"❌ ENV_ROUTES: Full observation: {observation}")
72
+ raise ValueError(
73
+ f"Wordle observation missing required keys in {context}: {missing_keys}"
74
+ )
75
+
76
+ # Validate data types
77
+ if not isinstance(observation.get("text"), str):
78
+ raise ValueError(
79
+ f"Wordle observation 'text' must be string in {context}, got {type(observation.get('text'))}"
80
+ )
81
+
82
+ if not isinstance(observation.get("guesses"), list):
83
+ raise ValueError(
84
+ f"Wordle observation 'guesses' must be list in {context}, got {type(observation.get('guesses'))}"
85
+ )
86
+
87
+ if not isinstance(observation.get("feedback"), list):
88
+ raise ValueError(
89
+ f"Wordle observation 'feedback' must be list in {context}, got {type(observation.get('feedback'))}"
90
+ )
91
+
92
+ logger.info(
93
+ f"✅ ENV_ROUTES: Wordle observation structure validated successfully in {context}"
94
+ )
95
+ else:
96
+ logger.debug(
97
+ f"🔍 ENV_ROUTES: Observation doesn't appear to be Wordle in {context}, skipping validation"
98
+ )
99
+
100
+
101
+ class EnvCreateRequest(BaseModel):
102
+ env_name: str
103
+ config: Dict[str, Any] = {}
104
+ seed: Optional[int] = None
105
+ parent_env_id: Optional[str] = None
106
+ rl_run_id: str
107
+
108
+
109
+ class EnvCreateResponse(BaseModel):
110
+ env_id: str
111
+ observation: Dict[str, Any]
112
+ info: Optional[Dict[str, Any]] = None
113
+ step_idx: int
114
+
115
+
116
+ class EnvResetRequest(BaseModel):
117
+ env_id: str
118
+ seed: Optional[int] = None
119
+
120
+
121
+ class EnvResetResponse(BaseModel):
122
+ observation: Dict[str, Any]
123
+ info: Optional[Dict[str, Any]] = None
124
+ step_idx: int
125
+
126
+
127
+ class EnvStepRequest(BaseModel):
128
+ env_id: str
129
+ tool_calls: List[Dict[str, Any]]
130
+
131
+
132
+ class EnvStepResponse(BaseModel):
133
+ observation: Dict[str, Any]
134
+ done: bool
135
+ info: Optional[Dict[str, Any]] = None
136
+ reward: Optional[float] = None
137
+ truncated: Optional[bool] = None
138
+ step_idx: int
139
+
140
+
141
+ class EnvSnapshotRequest(BaseModel):
142
+ env_id: str
143
+
144
+
145
+ class EnvSnapshotResponse(BaseModel):
146
+ snapshot_id: str
147
+ path: str
148
+ rl_run_id: str
149
+ size: int
150
+
151
+
152
+ class EnvRestoreRequest(BaseModel):
153
+ snapshot_id: str
154
+
155
+
156
+ class EnvRestoreResponse(BaseModel):
157
+ env_id: str
158
+ observation: Dict[str, Any]
159
+ info: Optional[Dict[str, Any]] = None
160
+ step_idx: int
161
+
162
+
163
+ class EnvTerminateRequest(BaseModel):
164
+ env_id: str
165
+
166
+
167
+ class EnvTerminateResponse(BaseModel):
168
+ ok: bool
169
+
170
+
171
+ @router.post("/create", response_model=EnvCreateResponse)
172
+ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
173
+ """Create a new environment instance."""
174
+ try:
175
+ # Create the underlying synth-ai environment
176
+ env_name_lower = request.env_name.lower()
177
+ if env_name_lower == "crafter":
178
+ # Build a minimal Crafter task instance
179
+ difficulty = (request.config or {}).get("difficulty", "normal")
180
+ seed_value = request.seed if request.seed is not None else 0
181
+ # Task object is part of the ecosystem; not required for instantiation here
182
+ impetus = Impetus(instructions="Survive and unlock achievements.")
183
+ intent = Intent(
184
+ rubric={"goal": "Unlock achievements"},
185
+ gold_trajectories=None,
186
+ gold_state_diff={},
187
+ )
188
+ metadata = CrafterTaskInstanceMetadata(
189
+ difficulty=difficulty,
190
+ seed=seed_value,
191
+ num_trees_radius=0,
192
+ num_cows_radius=0,
193
+ num_hostiles_radius=0,
194
+ )
195
+ instance = CrafterTaskInstance(
196
+ id=uuid4(),
197
+ impetus=impetus,
198
+ intent=intent,
199
+ metadata=metadata,
200
+ is_reproducible=True,
201
+ initial_engine_snapshot=None,
202
+ )
203
+ # Create CrafterClassicEnvironment from task instance
204
+ base_env = CrafterClassicEnvironment(task_instance=instance)
205
+
206
+ # Wrap it for our API
207
+ wrapper = CrafterEnvironmentWrapper(
208
+ env=base_env,
209
+ seed=request.seed,
210
+ )
211
+
212
+ # Initialize the environment
213
+ result = await wrapper.initialize()
214
+
215
+ # Log a world signature for sanity: seed + starting public state hash
216
+ try:
217
+ pub_state = base_env.engine._get_public_state_from_env() # type: ignore[attr-defined]
218
+ import hashlib, json as _json
219
+
220
+ sig_src = {
221
+ "player_position": list(pub_state.player_position),
222
+ "player_direction": pub_state.player_direction,
223
+ "semantic_map": pub_state.semantic_map,
224
+ "inventory": {k: v for k, v in pub_state.inventory.items() if v},
225
+ }
226
+ sig_str = _json.dumps(sig_src, sort_keys=True)
227
+ sig = hashlib.md5(sig_str.encode("utf-8")).hexdigest()[:12]
228
+ logger.info(
229
+ "Crafter init signature: seed=%s sig=%s pos=%s inv=%s",
230
+ str(seed_value),
231
+ sig,
232
+ list(pub_state.player_position),
233
+ {k: v for k, v in pub_state.inventory.items() if v},
234
+ )
235
+ except Exception as _e:
236
+ pass
237
+
238
+ # Handle the observation structure consistently
239
+ # For Crafter, the result might still have the old nested structure, so we need to handle both
240
+ if isinstance(result, dict) and "observation" in result:
241
+ # Old nested structure - extract the inner observation
242
+ observation_for_registry = result["observation"].copy()
243
+ else:
244
+ # New flat structure - remove non-observation fields
245
+ observation_for_registry = result.copy()
246
+ for key in ["step_idx", "info"]:
247
+ if key in observation_for_registry:
248
+ del observation_for_registry[key]
249
+
250
+ # Register in memory
251
+ env_id = registry.register_env(
252
+ env=wrapper,
253
+ seed=request.seed,
254
+ rl_run_id=request.rl_run_id,
255
+ last_observation=observation_for_registry,
256
+ last_info=result.get("info"),
257
+ )
258
+
259
+ # Update step index in registry
260
+ handle = registry.get_env(env_id)
261
+ if handle:
262
+ handle.step_idx = result["step_idx"]
263
+
264
+ return EnvCreateResponse(
265
+ env_id=env_id,
266
+ observation=observation_for_registry,
267
+ info=result.get("info"),
268
+ step_idx=result["step_idx"],
269
+ )
270
+ elif env_name_lower == "wordle":
271
+ # Defer imports to avoid hard dependency when not used
272
+ try:
273
+ from synth_ai.environments.examples.wordle.taskset import (
274
+ WordleTaskInstance,
275
+ WordleTaskInstanceMetadata,
276
+ )
277
+ from synth_ai.environments.examples.wordle.environment import (
278
+ WordleEnvironment,
279
+ )
280
+ except Exception as e:
281
+ raise HTTPException(
282
+ status_code=500, detail=f"Wordle modules unavailable: {e}"
283
+ )
284
+
285
+ # Lazy import of wrapper within branch
286
+ try:
287
+ from .envs.wordle.environment import (
288
+ WordleEnvironmentWrapper as _WordleWrapper,
289
+ )
290
+ except Exception as e:
291
+ raise HTTPException(
292
+ status_code=500, detail=f"Wordle wrapper unavailable: {e}"
293
+ )
294
+
295
+ cfg = request.config or {}
296
+ word_length = int(cfg.get("word_length", 5))
297
+ max_guesses = int(cfg.get("max_guesses", 6))
298
+
299
+ # Build a single Wordle task instance with proper seed usage
300
+ md = WordleTaskInstanceMetadata(
301
+ word_length=word_length,
302
+ max_guesses=max_guesses,
303
+ target_word=None, # Let seed determine the word
304
+ enforce_wordlist=True,
305
+ seed=request.seed,
306
+ consume_invalid_attempts=True,
307
+ )
308
+ instance = WordleTaskInstance(
309
+ id=uuid4(),
310
+ impetus=Impetus(
311
+ instructions="Play Wordle. Submit one 5-letter word per turn."
312
+ ),
313
+ intent=Intent(
314
+ rubric="guess the word", gold_trajectories=None, gold_state_diff={}
315
+ ),
316
+ metadata=md,
317
+ is_reproducible=True,
318
+ initial_engine_snapshot=None,
319
+ )
320
+ base_env = WordleEnvironment(task_instance=instance)
321
+
322
+ # Try to preserve the exact puzzle snapshot for reproducibility
323
+ init_snap = getattr(instance, "initial_engine_snapshot", None)
324
+
325
+ wrapper = _WordleWrapper(
326
+ env=base_env,
327
+ seed=request.seed,
328
+ word_length=word_length,
329
+ max_guesses=max_guesses,
330
+ initial_engine_snapshot=init_snap,
331
+ )
332
+
333
+ result = await wrapper.initialize()
334
+
335
+ # Validate Wordle observation structure
336
+ # After our fix, the result is now flat, so we need to extract the observation fields
337
+ # that should be passed to the registry and response
338
+ if isinstance(result, dict) and "observation" in result:
339
+ # Old nested structure - extract the inner observation
340
+ observation_for_registry = result["observation"].copy()
341
+ else:
342
+ # New flat structure - remove non-observation fields
343
+ observation_for_registry = result.copy()
344
+ for key in ["step_idx", "info"]:
345
+ if key in observation_for_registry:
346
+ del observation_for_registry[key]
347
+
348
+ await validate_environment_observation(
349
+ observation_for_registry, "initialize"
350
+ )
351
+
352
+ env_id = registry.register_env(
353
+ env=wrapper,
354
+ seed=request.seed,
355
+ rl_run_id=request.rl_run_id,
356
+ last_observation=observation_for_registry,
357
+ last_info=result.get("info"),
358
+ )
359
+ handle = registry.get_env(env_id)
360
+ if handle:
361
+ handle.step_idx = result["step_idx"]
362
+ return EnvCreateResponse(
363
+ env_id=env_id,
364
+ observation=observation_for_registry,
365
+ info=result.get("info"),
366
+ step_idx=result["step_idx"],
367
+ )
368
+
369
+ elif env_name_lower == "sokoban":
370
+ try:
371
+ from synth_ai.environments.examples.sokoban.taskset import (
372
+ SokobanTaskInstance,
373
+ SokobanTaskInstanceMetadata,
374
+ )
375
+ from synth_ai.environments.examples.sokoban.environment import (
376
+ SokobanEnvironment,
377
+ )
378
+ except Exception as e:
379
+ raise HTTPException(
380
+ status_code=500, detail=f"Sokoban modules unavailable: {e}"
381
+ )
382
+
383
+ # Lazy import of wrapper within branch
384
+ try:
385
+ from .envs.sokoban.environment import (
386
+ SokobanEnvironmentWrapper as _SokobanWrapper,
387
+ )
388
+ except Exception as e:
389
+ raise HTTPException(
390
+ status_code=500, detail=f"Sokoban wrapper unavailable: {e}"
391
+ )
392
+
393
+ cfg = request.config or {}
394
+ difficulty = cfg.get("difficulty", "easy")
395
+ initial_state = cfg.get("initial_state") # Optional engine snapshot
396
+
397
+ metadata = SokobanTaskInstanceMetadata(
398
+ difficulty=difficulty,
399
+ )
400
+ instance = SokobanTaskInstance(
401
+ id=uuid4(),
402
+ impetus=Impetus(instructions="Push boxes to targets."),
403
+ intent=Intent(
404
+ rubric={"goal": "Solve the Sokoban puzzle"},
405
+ gold_trajectories=None,
406
+ gold_state_diff={},
407
+ ),
408
+ metadata=metadata,
409
+ is_reproducible=True,
410
+ initial_engine_snapshot=initial_state,
411
+ )
412
+ base_env = SokobanEnvironment(task_instance=instance)
413
+
414
+ wrapper = _SokobanWrapper(
415
+ env=base_env, seed=request.seed, config=cfg
416
+ )
417
+ result = await wrapper.initialize()
418
+
419
+ # Handle the observation structure consistently for Sokoban
420
+ if isinstance(result, dict) and "observation" in result:
421
+ # Old nested structure - extract the inner observation
422
+ observation_for_registry = result["observation"].copy()
423
+ else:
424
+ # New flat structure - remove non-observation fields
425
+ observation_for_registry = result.copy()
426
+ for key in ["step_idx", "info"]:
427
+ if key in observation_for_registry:
428
+ del observation_for_registry[key]
429
+
430
+ env_id = registry.register_env(
431
+ env=wrapper,
432
+ seed=request.seed,
433
+ rl_run_id=request.rl_run_id,
434
+ last_observation=observation_for_registry,
435
+ last_info=result.get("info"),
436
+ )
437
+ handle = registry.get_env(env_id)
438
+ if handle:
439
+ handle.step_idx = result["step_idx"]
440
+ return EnvCreateResponse(
441
+ env_id=env_id,
442
+ observation=observation_for_registry,
443
+ info=result.get("info"),
444
+ step_idx=result["step_idx"],
445
+ )
446
+
447
+ elif env_name_lower == "math":
448
+ # Single-step math env (GSM8K-style)
449
+ cfg = request.config or {}
450
+ # Lazy import of wrapper within branch
451
+ try:
452
+ from .envs.math.environment import (
453
+ MathEnvironmentWrapper as _MathWrapper,
454
+ )
455
+ except Exception as e:
456
+ raise HTTPException(
457
+ status_code=500, detail=f"Math wrapper unavailable: {e}"
458
+ )
459
+
460
+ wrapper = _MathWrapper(
461
+ seed=request.seed,
462
+ problem_id=cfg.get("problem_id"),
463
+ problem_text=cfg.get("problem_text"),
464
+ )
465
+ result = await wrapper.initialize()
466
+
467
+ observation_for_registry = result["observation"].copy() if isinstance(result, dict) and "observation" in result else result.copy()
468
+ for key in ["step_idx", "info"]:
469
+ if key in observation_for_registry:
470
+ del observation_for_registry[key]
471
+
472
+ env_id = registry.register_env(
473
+ env=wrapper,
474
+ seed=request.seed,
475
+ rl_run_id=request.rl_run_id,
476
+ last_observation=observation_for_registry,
477
+ last_info=result.get("info"),
478
+ )
479
+ handle = registry.get_env(env_id)
480
+ if handle:
481
+ handle.step_idx = result["step_idx"]
482
+ return EnvCreateResponse(
483
+ env_id=env_id,
484
+ observation=observation_for_registry,
485
+ info=result.get("info"),
486
+ step_idx=result["step_idx"],
487
+ )
488
+ else:
489
+ raise HTTPException(
490
+ status_code=422,
491
+ detail=f"Unknown environment name: {request.env_name}",
492
+ )
493
+
494
+ except Exception as e:
495
+ logger.error(f"Failed to create environment: {e}")
496
+ raise HTTPException(status_code=500, detail=str(e))
497
+
498
+
499
+ # --- Compatibility routes for existing eval scripts that expect CrafterClassic paths ---
500
+ @router.post("/CrafterClassic/initialize", response_model=EnvCreateResponse)
501
+ async def compat_initialize(payload: dict) -> EnvCreateResponse:
502
+ seed = payload.get("seed")
503
+ wc = payload.get("world_config")
504
+ cfg = payload.get("config")
505
+ difficulty: str = "normal"
506
+ if isinstance(wc, str) and wc:
507
+ difficulty = wc
508
+ elif isinstance(wc, dict) and wc.get("difficulty"):
509
+ difficulty = str(wc.get("difficulty"))
510
+ elif isinstance(cfg, dict) and cfg.get("difficulty"):
511
+ difficulty = str(cfg.get("difficulty"))
512
+ req = EnvCreateRequest(env_name="crafter", config={"difficulty": difficulty}, seed=seed, rl_run_id="eval")
513
+ return await create_environment(req)
514
+
515
+
516
+ @router.post("/CrafterClassic/step", response_model=EnvStepResponse)
517
+ async def compat_step(payload: dict) -> EnvStepResponse:
518
+ env_id = payload.get("env_id")
519
+ # eval script wraps action as {"tool_calls":[{"tool":"interact","args":{"action": <id>}}]}
520
+ action = payload.get("action") or {}
521
+ tool_calls = action.get("tool_calls") if isinstance(action, dict) else None
522
+ if not isinstance(tool_calls, list):
523
+ tool_calls = []
524
+ # Fallback: support {action: {actions: [..]}} by expanding into tool_calls
525
+ actions_list = action.get("actions") if isinstance(action, dict) else None
526
+ if isinstance(actions_list, list) and actions_list:
527
+ for a in actions_list:
528
+ tool_calls.append({
529
+ "tool": "interact",
530
+ "args": {"action": a},
531
+ })
532
+ req = EnvStepRequest(env_id=env_id, tool_calls=tool_calls)
533
+ return await step_environment(req)
534
+
535
+
536
+ @router.post("/CrafterClassic/terminate", response_model=EnvTerminateResponse)
537
+ async def compat_terminate(payload: dict) -> EnvTerminateResponse:
538
+ env_id = payload.get("env_id")
539
+ req = EnvTerminateRequest(env_id=env_id)
540
+ return await terminate_environment(req)
541
+
542
+
543
+ @router.post("/reset", response_model=EnvResetResponse)
544
+ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
545
+ """Reset an environment to its initial state."""
546
+ handle = registry.get_env(request.env_id)
547
+ if not handle:
548
+ raise HTTPException(
549
+ status_code=404, detail=f"Environment {request.env_id} not found"
550
+ )
551
+
552
+ try:
553
+ # Determine wrapper type and rebuild base env if a new seed is provided
554
+ wrapper = handle.env
555
+ if isinstance(wrapper, CrafterEnvironmentWrapper):
556
+ if request.seed is not None:
557
+ try:
558
+ difficulty = "normal"
559
+ seed_value = int(request.seed)
560
+ metadata = CrafterTaskInstanceMetadata(
561
+ difficulty=difficulty,
562
+ seed=seed_value,
563
+ num_trees_radius=0,
564
+ num_cows_radius=0,
565
+ num_hostiles_radius=0,
566
+ )
567
+ instance = CrafterTaskInstance(
568
+ id=uuid4(),
569
+ impetus=Impetus(instructions="Reset"),
570
+ intent=Intent(
571
+ rubric={"goal": "Reset"},
572
+ gold_trajectories=None,
573
+ gold_state_diff={},
574
+ ),
575
+ metadata=metadata,
576
+ is_reproducible=True,
577
+ initial_engine_snapshot=None,
578
+ )
579
+ new_base_env = CrafterClassicEnvironment(task_instance=instance)
580
+ wrapper.env = new_base_env
581
+ wrapper.seed = seed_value
582
+ handle.seed = seed_value
583
+ except Exception:
584
+ wrapper.seed = request.seed
585
+ handle.seed = request.seed
586
+
587
+ elif True:
588
+ # Try to dynamically import Wordle wrapper and check instance safely
589
+ try:
590
+ from .envs.wordle.environment import (
591
+ WordleEnvironmentWrapper as _WordleWrapper,
592
+ )
593
+ except Exception:
594
+ _WordleWrapper = None # type: ignore
595
+
596
+ if _WordleWrapper is not None and isinstance(wrapper, _WordleWrapper):
597
+ # Rebuild Wordle env with the same configuration; if we have a preserved
598
+ # initial_engine_snapshot, prefer constructing the instance directly.
599
+ try:
600
+ from synth_ai.environments.examples.wordle.taskset import (
601
+ create_wordle_taskset,
602
+ WordleTaskInstance,
603
+ WordleTaskInstanceMetadata,
604
+ )
605
+ from synth_ai.environments.examples.wordle.environment import (
606
+ WordleEnvironment,
607
+ )
608
+ except Exception as e:
609
+ raise HTTPException(
610
+ status_code=500, detail=f"Wordle modules unavailable: {e}"
611
+ )
612
+
613
+ init_snap = getattr(wrapper, "initial_engine_snapshot", None)
614
+ if init_snap is not None:
615
+ metadata = WordleTaskInstanceMetadata(
616
+ word_length=int(wrapper.word_length),
617
+ max_guesses=int(wrapper.max_guesses),
618
+ )
619
+ instance = WordleTaskInstance(
620
+ id=uuid4(),
621
+ impetus=Impetus(instructions="Reset"),
622
+ intent=Intent(
623
+ rubric={"goal": "Reset"},
624
+ gold_trajectories=None,
625
+ gold_state_diff={},
626
+ ),
627
+ metadata=metadata,
628
+ is_reproducible=True,
629
+ initial_engine_snapshot=init_snap,
630
+ )
631
+ new_base_env = WordleEnvironment(task_instance=instance)
632
+ else:
633
+ ts = await create_wordle_taskset(
634
+ sample_size=1,
635
+ word_length=int(wrapper.word_length),
636
+ max_guesses=int(wrapper.max_guesses),
637
+ )
638
+ instance = ts.instances[0]
639
+ new_base_env = WordleEnvironment(task_instance=instance)
640
+ wrapper.env = new_base_env
641
+ if request.seed is not None:
642
+ wrapper.seed = int(request.seed)
643
+ handle.seed = int(request.seed)
644
+ else:
645
+ pass
646
+ # Rebuild Wordle env with the same configuration; if we have a preserved
647
+ # initial_engine_snapshot, prefer constructing the instance directly.
648
+ try:
649
+ from synth_ai.environments.examples.wordle.taskset import (
650
+ create_wordle_taskset,
651
+ WordleTaskInstance,
652
+ WordleTaskInstanceMetadata,
653
+ )
654
+ from synth_ai.environments.examples.wordle.environment import (
655
+ WordleEnvironment,
656
+ )
657
+ except Exception as e:
658
+ raise HTTPException(
659
+ status_code=500, detail=f"Wordle modules unavailable: {e}"
660
+ )
661
+
662
+ init_snap = getattr(wrapper, "initial_engine_snapshot", None)
663
+ if init_snap is not None:
664
+ metadata = WordleTaskInstanceMetadata(
665
+ word_length=int(wrapper.word_length),
666
+ max_guesses=int(wrapper.max_guesses),
667
+ )
668
+ instance = WordleTaskInstance(
669
+ id=uuid4(),
670
+ impetus=Impetus(instructions="Reset"),
671
+ intent=Intent(
672
+ rubric={"goal": "Reset"},
673
+ gold_trajectories=None,
674
+ gold_state_diff={},
675
+ ),
676
+ metadata=metadata,
677
+ is_reproducible=True,
678
+ initial_engine_snapshot=init_snap,
679
+ )
680
+ new_base_env = WordleEnvironment(task_instance=instance)
681
+ else:
682
+ ts = await create_wordle_taskset(
683
+ sample_size=1,
684
+ word_length=int(wrapper.word_length),
685
+ max_guesses=int(wrapper.max_guesses),
686
+ )
687
+ instance = ts.instances[0]
688
+ new_base_env = WordleEnvironment(task_instance=instance)
689
+ wrapper.env = new_base_env
690
+ if request.seed is not None:
691
+ wrapper.seed = int(request.seed)
692
+ handle.seed = int(request.seed)
693
+
694
+ elif True:
695
+ # Try to dynamically import Sokoban wrapper and check instance safely
696
+ try:
697
+ from .envs.sokoban.environment import (
698
+ SokobanEnvironmentWrapper as _SokobanWrapper,
699
+ )
700
+ except Exception:
701
+ _SokobanWrapper = None # type: ignore
702
+
703
+ if _SokobanWrapper is not None and isinstance(wrapper, _SokobanWrapper):
704
+ # Rebuild Sokoban env using stored config snapshot
705
+ try:
706
+ from synth_ai.environments.examples.sokoban.taskset import (
707
+ SokobanTaskInstance,
708
+ SokobanTaskInstanceMetadata,
709
+ )
710
+ from synth_ai.environments.examples.sokoban.environment import (
711
+ SokobanEnvironment,
712
+ )
713
+ except Exception as e:
714
+ raise HTTPException(
715
+ status_code=500, detail=f"Sokoban modules unavailable: {e}"
716
+ )
717
+
718
+ cfg = dict(wrapper.config or {})
719
+ metadata = SokobanTaskInstanceMetadata(
720
+ difficulty=cfg.get("difficulty", "easy"),
721
+ )
722
+ instance = SokobanTaskInstance(
723
+ id=uuid4(),
724
+ impetus=Impetus(instructions="Reset"),
725
+ intent=Intent(
726
+ rubric={"goal": "Reset"}, gold_trajectories=None, gold_state_diff={}
727
+ ),
728
+ metadata=metadata,
729
+ is_reproducible=True,
730
+ initial_engine_snapshot=cfg.get("initial_state"),
731
+ )
732
+ new_base_env = SokobanEnvironment(task_instance=instance)
733
+ wrapper.env = new_base_env
734
+ if request.seed is not None:
735
+ wrapper.seed = int(request.seed)
736
+ handle.seed = int(request.seed)
737
+ else:
738
+ pass
739
+ # Rebuild Sokoban env using stored config snapshot
740
+ try:
741
+ from synth_ai.environments.examples.sokoban.taskset import (
742
+ SokobanTaskInstance,
743
+ SokobanTaskInstanceMetadata,
744
+ )
745
+ from synth_ai.environments.examples.sokoban.environment import (
746
+ SokobanEnvironment,
747
+ )
748
+ except Exception as e:
749
+ raise HTTPException(
750
+ status_code=500, detail=f"Sokoban modules unavailable: {e}"
751
+ )
752
+
753
+ cfg = dict(wrapper.config or {})
754
+ metadata = SokobanTaskInstanceMetadata(
755
+ difficulty=cfg.get("difficulty", "easy"),
756
+ )
757
+ instance = SokobanTaskInstance(
758
+ id=uuid4(),
759
+ impetus=Impetus(instructions="Reset"),
760
+ intent=Intent(
761
+ rubric={"goal": "Reset"}, gold_trajectories=None, gold_state_diff={}
762
+ ),
763
+ metadata=metadata,
764
+ is_reproducible=True,
765
+ initial_engine_snapshot=cfg.get("initial_state"),
766
+ )
767
+ new_base_env = SokobanEnvironment(task_instance=instance)
768
+ wrapper.env = new_base_env
769
+ if request.seed is not None:
770
+ wrapper.seed = int(request.seed)
771
+ handle.seed = int(request.seed)
772
+
773
+ # Reset the environment regardless of type
774
+ result = await wrapper.initialize()
775
+
776
+ # Log a world signature after reset for sanity
777
+ try:
778
+ base_env = handle.env.env # type: ignore[attr-defined]
779
+ pub_state = base_env.engine._get_public_state_from_env() # type: ignore[attr-defined]
780
+ import hashlib, json as _json
781
+
782
+ sig_src = {
783
+ "player_position": list(pub_state.player_position),
784
+ "player_direction": pub_state.player_direction,
785
+ "semantic_map": pub_state.semantic_map,
786
+ "inventory": {k: v for k, v in pub_state.inventory.items() if v},
787
+ }
788
+ sig_str = _json.dumps(sig_src, sort_keys=True)
789
+ sig = hashlib.md5(sig_str.encode("utf-8")).hexdigest()[:12]
790
+ logger.info(
791
+ "Crafter reset signature: seed=%s sig=%s pos=%s inv=%s",
792
+ str(handle.seed),
793
+ sig,
794
+ list(pub_state.player_position),
795
+ {k: v for k, v in pub_state.inventory.items() if v},
796
+ )
797
+ except Exception as _:
798
+ pass
799
+
800
+ # Update registry
801
+ handle.step_idx = result["step_idx"]
802
+ handle.last_observation = result["observation"]
803
+ handle.last_info = result.get("info")
804
+
805
+ return EnvResetResponse(
806
+ observation=result["observation"],
807
+ info=result.get("info"),
808
+ step_idx=result["step_idx"],
809
+ )
810
+
811
+ except Exception as e:
812
+ logger.error(f"Failed to reset environment {request.env_id}: {e}")
813
+ raise HTTPException(status_code=500, detail=str(e))
814
+
815
+
816
+ @router.post("/step", response_model=EnvStepResponse)
817
+ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
818
+ """Execute a step in the environment."""
819
+ handle = registry.get_env(request.env_id)
820
+ if not handle:
821
+ raise HTTPException(
822
+ status_code=404, detail=f"Environment {request.env_id} not found"
823
+ )
824
+
825
+ try:
826
+ # Execute the step, pre-normalizing invalid Wordle guesses to avoid hard failures
827
+ wrapper = handle.env
828
+ try:
829
+ from .envs.wordle.environment import (
830
+ WordleEnvironmentWrapper as _WordleWrapper,
831
+ )
832
+ except Exception:
833
+ _WordleWrapper = None # type: ignore
834
+
835
+ if _WordleWrapper is not None and isinstance(wrapper, _WordleWrapper):
836
+ expected_len = int(getattr(wrapper, "word_length", 5))
837
+ normalized: List[Dict[str, Any]] = []
838
+ for tc in request.tool_calls or []:
839
+ tool = (
840
+ tc.get("tool")
841
+ or tc.get("tool_name")
842
+ or tc.get("name")
843
+ or "interact"
844
+ )
845
+ args = tc.get("arguments") or tc.get("args") or {}
846
+ if isinstance(args, str):
847
+ try:
848
+ args = json.loads(args)
849
+ except Exception:
850
+ args = {}
851
+ guess = None
852
+ if isinstance(args, dict):
853
+ guess = args.get("guess") or args.get("word")
854
+ if isinstance(guess, str):
855
+ g = guess.strip().lower()
856
+ if (not g.isalpha()) or (len(g) != expected_len):
857
+ normalized.append(
858
+ {"tool": "invalid_guess", "args": {"original_guess": guess}}
859
+ )
860
+ else:
861
+ # Preserve the original tool name (interact or submit) for the environment to handle
862
+ normalized.append({"tool": tool, "args": {"guess": g}})
863
+ else:
864
+ normalized.append(
865
+ {"tool": "invalid_guess", "args": {"original_guess": guess}}
866
+ )
867
+ result = await wrapper.step(normalized)
868
+ else:
869
+ result = await handle.env.step(request.tool_calls)
870
+
871
+ # Validate observation structure for Wordle environments
872
+ env_name = getattr(handle.env, "env", None)
873
+ if (
874
+ env_name
875
+ and hasattr(env_name, "__class__")
876
+ and "wordle" in env_name.__class__.__name__.lower()
877
+ ):
878
+ # Extract observation fields from the flat result structure for validation
879
+ observation_for_validation = result.copy()
880
+ # Remove step_idx, done, info, reward, truncated from the observation since they're separate fields
881
+ for key in ["step_idx", "done", "info", "reward", "truncated"]:
882
+ if key in observation_for_validation:
883
+ del observation_for_validation[key]
884
+ await validate_environment_observation(observation_for_validation, "step")
885
+
886
+ # Update registry
887
+ handle.step_idx = result["step_idx"]
888
+
889
+ # Extract the observation fields from the result structure (handle both old nested and new flat)
890
+ if isinstance(result, dict) and "observation" in result:
891
+ # Old nested structure - extract the inner observation
892
+ observation_for_registry = result["observation"].copy()
893
+ else:
894
+ # New flat structure - remove non-observation fields
895
+ observation_for_registry = result.copy()
896
+ for key in ["step_idx", "done", "info", "reward", "truncated"]:
897
+ if key in observation_for_registry:
898
+ del observation_for_registry[key]
899
+
900
+ handle.last_observation = observation_for_registry
901
+ handle.last_info = result.get("info")
902
+
903
+ return EnvStepResponse(
904
+ observation=observation_for_registry,
905
+ done=result["done"],
906
+ info=result.get("info"),
907
+ reward=result.get("reward"),
908
+ truncated=result.get("truncated"),
909
+ step_idx=result["step_idx"],
910
+ )
911
+
912
+ except Exception as e:
913
+ logger.error(f"Failed to step environment {request.env_id}: {e}")
914
+ # Fallback for Wordle: convert invalid guesses into 'invalid_guess' tool calls and retry once
915
+ try:
916
+ from .envs.wordle.environment import (
917
+ WordleEnvironmentWrapper as _WordleWrapper,
918
+ )
919
+
920
+ wrapper = handle.env
921
+ if isinstance(wrapper, _WordleWrapper):
922
+ expected_len = int(getattr(wrapper, "word_length", 5))
923
+ normalized: List[Dict[str, Any]] = []
924
+ for tc in request.tool_calls or []:
925
+ tool = (
926
+ tc.get("tool")
927
+ or tc.get("tool_name")
928
+ or tc.get("name")
929
+ or "interact"
930
+ )
931
+ args = tc.get("arguments") or tc.get("args") or {}
932
+ if isinstance(args, str):
933
+ try:
934
+ args = json.loads(args)
935
+ except Exception:
936
+ args = {}
937
+ guess = None
938
+ if isinstance(args, dict):
939
+ guess = args.get("guess") or args.get("word")
940
+ if isinstance(guess, str):
941
+ g = guess.strip().lower()
942
+ if (not g.isalpha()) or (len(g) != expected_len):
943
+ normalized.append(
944
+ {
945
+ "tool": "invalid_guess",
946
+ "args": {"original_guess": guess},
947
+ }
948
+ )
949
+ else:
950
+ normalized.append(
951
+ {"tool": "interact", "args": {"guess": g}}
952
+ )
953
+ else:
954
+ normalized.append(
955
+ {"tool": "invalid_guess", "args": {"original_guess": guess}}
956
+ )
957
+
958
+ # Retry with normalized calls, allowing the wrapper to synthesize an observation
959
+ result = await wrapper.step(normalized)
960
+
961
+ # Update registry and return as usual
962
+ handle.step_idx = result["step_idx"]
963
+ if isinstance(result, dict) and "observation" in result:
964
+ observation_for_registry = result["observation"].copy()
965
+ else:
966
+ observation_for_registry = result.copy()
967
+ for key in ["step_idx", "done", "info", "reward", "truncated"]:
968
+ if key in observation_for_registry:
969
+ del observation_for_registry[key]
970
+ handle.last_observation = observation_for_registry
971
+ handle.last_info = result.get("info")
972
+ return EnvStepResponse(
973
+ observation=observation_for_registry,
974
+ done=result["done"],
975
+ info=result.get("info"),
976
+ reward=result.get("reward"),
977
+ truncated=result.get("truncated"),
978
+ step_idx=result["step_idx"],
979
+ )
980
+ except Exception:
981
+ # Ignore fallback errors; fall through to generic error
982
+ pass
983
+
984
+ raise HTTPException(status_code=500, detail=f"{type(e).__name__}: {e}")
985
+
986
+
987
+ @router.post("/snapshot", response_model=EnvSnapshotResponse)
988
+ async def snapshot_environment(request: EnvSnapshotRequest) -> EnvSnapshotResponse:
989
+ """Create a snapshot of the environment state."""
990
+ handle = registry.get_env(request.env_id)
991
+ if not handle:
992
+ raise HTTPException(
993
+ status_code=404, detail=f"Environment {request.env_id} not found"
994
+ )
995
+
996
+ try:
997
+ # Serialize environment state
998
+ state_dict = await handle.env.serialize()
999
+
1000
+ # Save to volume
1001
+ snapshot_id, path, size = storage.save_snapshot(
1002
+ rl_run_id=handle.rl_run_id,
1003
+ kind="env",
1004
+ state_dict=state_dict,
1005
+ config={"seed": handle.seed},
1006
+ )
1007
+
1008
+ # Register snapshot
1009
+ registry.register_snapshot(
1010
+ kind="env",
1011
+ rl_run_id=handle.rl_run_id,
1012
+ size=size,
1013
+ path=path,
1014
+ )
1015
+
1016
+ return EnvSnapshotResponse(
1017
+ snapshot_id=snapshot_id,
1018
+ path=path,
1019
+ rl_run_id=handle.rl_run_id,
1020
+ size=size,
1021
+ )
1022
+
1023
+ except Exception as e:
1024
+ logger.error(f"Failed to snapshot environment {request.env_id}: {e}")
1025
+ raise HTTPException(status_code=500, detail=str(e))
1026
+
1027
+
1028
+ @router.post("/restore", response_model=EnvRestoreResponse)
1029
+ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
1030
+ """Restore an environment from a snapshot."""
1031
+ snapshot = registry.get_snapshot(request.snapshot_id)
1032
+ if not snapshot:
1033
+ raise HTTPException(
1034
+ status_code=404, detail=f"Snapshot {request.snapshot_id} not found"
1035
+ )
1036
+
1037
+ if snapshot.kind != "env":
1038
+ raise HTTPException(
1039
+ status_code=422,
1040
+ detail=f"Snapshot {request.snapshot_id} is not an environment snapshot",
1041
+ )
1042
+
1043
+ try:
1044
+ # Load snapshot from volume
1045
+ state_dict, meta = storage.load_snapshot(
1046
+ rl_run_id=snapshot.rl_run_id,
1047
+ kind="env",
1048
+ snapshot_id=request.snapshot_id,
1049
+ )
1050
+
1051
+ # Recreate environment
1052
+ env_name = state_dict.get("name", "crafter")
1053
+ name_lower = str(env_name).lower()
1054
+ if name_lower == "crafter":
1055
+ # Create base environment
1056
+ # Recreate classic env from snapshot metadata
1057
+ seed_value = state_dict["config"]["seed"]
1058
+ metadata = CrafterTaskInstanceMetadata(
1059
+ difficulty="normal",
1060
+ seed=seed_value,
1061
+ num_trees_radius=0,
1062
+ num_cows_radius=0,
1063
+ num_hostiles_radius=0,
1064
+ )
1065
+ instance = CrafterTaskInstance(
1066
+ id=uuid4(),
1067
+ impetus=Impetus(instructions="Restore"),
1068
+ intent=Intent(
1069
+ rubric={"goal": "Restore"},
1070
+ gold_trajectories=None,
1071
+ gold_state_diff={},
1072
+ ),
1073
+ metadata=metadata,
1074
+ is_reproducible=True,
1075
+ initial_engine_snapshot=None,
1076
+ )
1077
+ base_env = CrafterClassicEnvironment(task_instance=instance)
1078
+
1079
+ # Deserialize into wrapper
1080
+ wrapper = await CrafterEnvironmentWrapper.deserialize(
1081
+ payload=state_dict,
1082
+ env=base_env,
1083
+ )
1084
+
1085
+ # Register new instance
1086
+ env_id = registry.register_env(
1087
+ env=wrapper,
1088
+ seed=wrapper.seed,
1089
+ rl_run_id=snapshot.rl_run_id,
1090
+ last_observation=wrapper.last_observation,
1091
+ last_info=wrapper.last_info,
1092
+ )
1093
+
1094
+ # Update step index
1095
+ handle = registry.get_env(env_id)
1096
+ if handle:
1097
+ handle.step_idx = wrapper.step_idx
1098
+
1099
+ return EnvRestoreResponse(
1100
+ env_id=env_id,
1101
+ observation=wrapper.last_observation or {},
1102
+ info=wrapper.last_info,
1103
+ step_idx=wrapper.step_idx,
1104
+ )
1105
+ elif name_lower == "wordle":
1106
+ try:
1107
+ from synth_ai.environments.examples.wordle.taskset import (
1108
+ create_wordle_taskset,
1109
+ WordleTaskInstance,
1110
+ WordleTaskInstanceMetadata,
1111
+ )
1112
+ from synth_ai.environments.examples.wordle.environment import (
1113
+ WordleEnvironment,
1114
+ )
1115
+ except Exception as e:
1116
+ raise HTTPException(
1117
+ status_code=500, detail=f"Wordle modules unavailable: {e}"
1118
+ )
1119
+
1120
+ cfg = state_dict.get("config", {}) or {}
1121
+ word_length = int(cfg.get("word_length", 5))
1122
+ max_guesses = int(cfg.get("max_guesses", 6))
1123
+ init_snap = cfg.get("initial_engine_snapshot")
1124
+ if init_snap is not None:
1125
+ metadata = WordleTaskInstanceMetadata(
1126
+ word_length=word_length, max_guesses=max_guesses
1127
+ )
1128
+ instance = WordleTaskInstance(
1129
+ id=uuid4(),
1130
+ impetus=Impetus(instructions="Restore"),
1131
+ intent=Intent(
1132
+ rubric={"goal": "Restore"},
1133
+ gold_trajectories=None,
1134
+ gold_state_diff={},
1135
+ ),
1136
+ metadata=metadata,
1137
+ is_reproducible=True,
1138
+ initial_engine_snapshot=init_snap,
1139
+ )
1140
+ base_env = WordleEnvironment(task_instance=instance)
1141
+ else:
1142
+ ts = await create_wordle_taskset(
1143
+ sample_size=1, word_length=word_length, max_guesses=max_guesses
1144
+ )
1145
+ instance = ts.instances[0]
1146
+ base_env = WordleEnvironment(task_instance=instance)
1147
+ # Lazy import of wrapper only when needed
1148
+ try:
1149
+ from .envs.wordle.environment import (
1150
+ WordleEnvironmentWrapper as _WordleWrapper,
1151
+ )
1152
+ except Exception as e:
1153
+ raise HTTPException(
1154
+ status_code=500, detail=f"Wordle wrapper unavailable: {e}"
1155
+ )
1156
+ wrapper = await _WordleWrapper.deserialize(
1157
+ payload=state_dict, env=base_env
1158
+ )
1159
+
1160
+ env_id = registry.register_env(
1161
+ env=wrapper,
1162
+ seed=wrapper.seed,
1163
+ rl_run_id=snapshot.rl_run_id,
1164
+ last_observation=wrapper.last_observation,
1165
+ last_info=wrapper.last_info,
1166
+ )
1167
+ handle = registry.get_env(env_id)
1168
+ if handle:
1169
+ handle.step_idx = wrapper.step_idx
1170
+ return EnvRestoreResponse(
1171
+ env_id=env_id,
1172
+ observation=wrapper.last_observation or {},
1173
+ info=wrapper.last_info,
1174
+ step_idx=wrapper.step_idx,
1175
+ )
1176
+
1177
+ elif name_lower == "sokoban":
1178
+ try:
1179
+ from synth_ai.environments.examples.sokoban.taskset import (
1180
+ SokobanTaskInstance,
1181
+ SokobanTaskInstanceMetadata,
1182
+ )
1183
+ from synth_ai.environments.examples.sokoban.environment import (
1184
+ SokobanEnvironment,
1185
+ )
1186
+ except Exception as e:
1187
+ raise HTTPException(
1188
+ status_code=500, detail=f"Sokoban modules unavailable: {e}"
1189
+ )
1190
+
1191
+ cfg = state_dict.get("config", {}) or {}
1192
+ metadata = SokobanTaskInstanceMetadata(
1193
+ difficulty=cfg.get("difficulty", "easy")
1194
+ )
1195
+ instance = SokobanTaskInstance(
1196
+ id=uuid4(),
1197
+ impetus=Impetus(instructions="Restore"),
1198
+ intent=Intent(
1199
+ rubric={"goal": "Restore"},
1200
+ gold_trajectories=None,
1201
+ gold_state_diff={},
1202
+ ),
1203
+ metadata=metadata,
1204
+ is_reproducible=True,
1205
+ initial_engine_snapshot=cfg.get("initial_state"),
1206
+ )
1207
+ base_env = SokobanEnvironment(task_instance=instance)
1208
+ # Lazy import of wrapper only when needed
1209
+ try:
1210
+ from .envs.sokoban.environment import (
1211
+ SokobanEnvironmentWrapper as _SokobanWrapper,
1212
+ )
1213
+ except Exception as e:
1214
+ raise HTTPException(
1215
+ status_code=500, detail=f"Sokoban wrapper unavailable: {e}"
1216
+ )
1217
+ wrapper = await _SokobanWrapper.deserialize(
1218
+ payload=state_dict, env=base_env
1219
+ )
1220
+
1221
+ env_id = registry.register_env(
1222
+ env=wrapper,
1223
+ seed=wrapper.seed,
1224
+ rl_run_id=snapshot.rl_run_id,
1225
+ last_observation=wrapper.last_observation,
1226
+ last_info=wrapper.last_info,
1227
+ )
1228
+ handle = registry.get_env(env_id)
1229
+ if handle:
1230
+ handle.step_idx = wrapper.step_idx
1231
+ return EnvRestoreResponse(
1232
+ env_id=env_id,
1233
+ observation=wrapper.last_observation or {},
1234
+ info=wrapper.last_info,
1235
+ step_idx=wrapper.step_idx,
1236
+ )
1237
+
1238
+ else:
1239
+ raise HTTPException(
1240
+ status_code=422,
1241
+ detail=f"Unknown environment name in snapshot: {env_name}",
1242
+ )
1243
+
1244
+ except Exception as e:
1245
+ logger.error(
1246
+ f"Failed to restore environment from snapshot {request.snapshot_id}: {e}"
1247
+ )
1248
+ raise HTTPException(status_code=500, detail=str(e))
1249
+
1250
+
1251
+ @router.post("/terminate", response_model=EnvTerminateResponse)
1252
+ async def terminate_environment(request: EnvTerminateRequest) -> EnvTerminateResponse:
1253
+ """Terminate an environment and clean up resources."""
1254
+ handle = registry.get_env(request.env_id)
1255
+ if not handle:
1256
+ raise HTTPException(
1257
+ status_code=404, detail=f"Environment {request.env_id} not found"
1258
+ )
1259
+
1260
+ try:
1261
+ # Call terminate on the environment
1262
+ await handle.env.terminate()
1263
+
1264
+ # Remove from registry
1265
+ registry.remove_env(request.env_id)
1266
+
1267
+ return EnvTerminateResponse(ok=True)
1268
+
1269
+ except Exception as e:
1270
+ logger.error(f"Failed to terminate environment {request.env_id}: {e}")
1271
+ raise HTTPException(status_code=500, detail=str(e))