synth-ai 0.2.9.dev2__py3-none-any.whl → 0.2.9.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (112) hide show
  1. examples/analyze_semantic_words.sh +17 -0
  2. examples/common_old/backend.py +21 -0
  3. examples/crafter_debug_render.py +180 -0
  4. examples/evals_old/README.md +98 -0
  5. examples/evals_old/__init__.py +6 -0
  6. examples/evals_old/compare_models.py +1037 -0
  7. examples/evals_old/example_log.md +145 -0
  8. examples/evals_old/run_demo.sh +126 -0
  9. examples/evals_old/trace_analysis.py +270 -0
  10. examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
  11. examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
  12. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
  13. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
  14. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
  15. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
  16. examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
  17. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
  18. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
  19. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
  20. examples/finetuning_old/synth_qwen_v1/README.md +68 -0
  21. examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
  22. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
  23. examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
  24. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
  25. examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
  26. examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
  27. examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
  28. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
  29. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
  30. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
  31. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
  32. examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
  33. examples/finetuning_old/synth_qwen_v1/util.py +147 -0
  34. examples/rl/README.md +169 -0
  35. examples/rl/configs/eval_base_qwen.toml +15 -0
  36. examples/rl/configs/eval_rl_qwen.toml +11 -0
  37. examples/rl/configs/rl_from_base_qwen.toml +35 -0
  38. examples/rl/configs/rl_from_base_qwen17.toml +74 -0
  39. examples/rl/configs/rl_from_ft_qwen.toml +35 -0
  40. examples/rl/download_dataset.py +64 -0
  41. examples/rl/run_eval.py +435 -0
  42. examples/rl/run_rl_and_save.py +94 -0
  43. examples/rl/task_app/README.md +22 -0
  44. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
  45. examples/rl/task_app/math_task_app.py +107 -0
  46. examples/rl_old/task_app.py +962 -0
  47. examples/run_crafter_demo.sh +10 -0
  48. examples/warming_up_to_rl/analyze_trace_db.py +420 -0
  49. examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
  50. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  51. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
  52. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
  53. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
  54. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
  55. examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
  56. examples/warming_up_to_rl/export_trace_sft.py +541 -0
  57. examples/warming_up_to_rl/groq_test.py +88 -0
  58. examples/warming_up_to_rl/manage_secrets.py +127 -0
  59. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  60. examples/warming_up_to_rl/old/notes.md +73 -0
  61. examples/warming_up_to_rl/readme.md +172 -0
  62. examples/warming_up_to_rl/run_eval.py +434 -0
  63. examples/warming_up_to_rl/run_fft_and_save.py +309 -0
  64. examples/warming_up_to_rl/run_local_rollout.py +188 -0
  65. examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
  66. examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
  67. examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
  68. examples/warming_up_to_rl/run_rl_and_save.py +101 -0
  69. examples/warming_up_to_rl/run_rollout_remote.py +129 -0
  70. examples/warming_up_to_rl/task_app/README.md +38 -0
  71. {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
  72. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
  73. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  74. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  75. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
  76. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
  77. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  78. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +58 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
  98. synth_ai/api/train/config_finder.py +18 -18
  99. synth_ai/api/train/env_resolver.py +28 -1
  100. synth_ai/cli/task_apps.py +264 -55
  101. synth_ai/demo_registry.py +7 -7
  102. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  103. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +54 -0
  104. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  105. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +165 -0
  106. synth_ai/task/apps/__init__.py +54 -13
  107. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/METADATA +1 -1
  108. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/RECORD +112 -13
  109. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/top_level.txt +1 -0
  110. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/WHEEL +0 -0
  111. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/entry_points.txt +0 -0
  112. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1 @@
1
+ """Environment implementations."""
@@ -0,0 +1,6 @@
1
+ """Crafter environment and policy implementations."""
2
+
3
+ from .environment import CrafterEnvironmentWrapper
4
+ from .policy import CrafterPolicy
5
+
6
+ __all__ = ["CrafterEnvironmentWrapper", "CrafterPolicy"]
@@ -0,0 +1,429 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional
4
+ import logging
5
+
6
+ from synth_ai.environments.stateful.core import StatefulEnvironment
7
+ from synth_ai.environments.environment.tools import EnvToolCall
8
+
9
+ from ...utils import convert_numpy_to_python
10
+ from .tools import TOOLS_SCHEMA
11
+ from .shared import CRAFTER_ACTIONS, _format_semantic_map_view
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class CrafterEnvironmentWrapper:
18
+ """Host-side environment wrapper matching the sketch contract.
19
+
20
+ Bridges our HTTP routes to a synth-ai `StatefulEnvironment` instance.
21
+
22
+ Contract (see sketch.txt):
23
+ - initialize() -> observation dict
24
+ - step(tool_calls: List[EnvToolCall]) -> observation dict plus optional done/reward/truncated/info
25
+ - snapshot()/restore() handled at route level; this wrapper exposes checkpoint via synth-ai
26
+ """
27
+
28
+ def __init__(self, env: StatefulEnvironment, seed: Optional[int] = None) -> None:
29
+ self.env = env
30
+ self.seed = seed
31
+ self.step_idx = 0
32
+ self.last_observation: Optional[Dict[str, Any]] = None
33
+ self.last_info: Optional[Dict[str, Any]] = None
34
+
35
+ async def initialize(self) -> Dict[str, Any]:
36
+ obs = await self.env.initialize()
37
+ # synth-ai InternalObservation expected to expose .observation (dict-like)
38
+ self.step_idx = 0
39
+ self.last_observation = getattr(obs, "observation", obs) # tolerate dict-like
40
+ self.last_info = getattr(obs, "info", None)
41
+ out_obs: Dict[str, Any] = convert_numpy_to_python(self.last_observation) or {}
42
+ # Attach a 7x7 semantic map patch centered on player for client-side rendering
43
+ try:
44
+ pub = self.env.engine._get_public_state_from_env() # type: ignore[attr-defined]
45
+ sem = pub.semantic_map
46
+ px, py = list(pub.player_position)
47
+ size = 7
48
+ half = size // 2
49
+ patch = []
50
+ H = len(sem) if hasattr(sem, "__len__") else 0
51
+ W = len(sem[0]) if H and hasattr(sem[0], "__len__") else 0
52
+ for dy in range(-half, half + 1):
53
+ row = []
54
+ for dx in range(-half, half + 1):
55
+ x, y = int(px) + dx, int(py) + dy
56
+ if 0 <= x < H and 0 <= y < W:
57
+ row.append(int(sem[x][y]))
58
+ else:
59
+ row.append(0)
60
+ patch.append(row)
61
+ if isinstance(out_obs, dict):
62
+ out_obs["semantic_map_patch7"] = patch
63
+ except Exception:
64
+ pass
65
+ return {
66
+ "observation": out_obs,
67
+ "info": convert_numpy_to_python(self.last_info) if self.last_info else None,
68
+ "step_idx": self.step_idx,
69
+ }
70
+
71
+ async def step(self, tool_calls: List[Dict[str, Any]] | List[EnvToolCall]) -> Dict[str, Any]:
72
+ # Normalize JSON tool_calls into EnvToolCall instances if needed
73
+ # Underlying synth-ai environment expects only tool="interact" with args={"action": <action_name>}.
74
+ # LLM may emit:
75
+ # - interact_many with {actions: [...]}
76
+ # - direct tool names like "make_wood_pickaxe" or "do"
77
+ # - or even tool_name "do" with arguments {"action": "make_wood_pickaxe"}
78
+ # We normalize all these into a sequence of EnvToolCall(tool="interact", args={"action": <resolved_action>}).
79
+ allowed_actions = set(
80
+ TOOLS_SCHEMA[0]["function"]["parameters"]["properties"]["actions"]["items"]["enum"]
81
+ )
82
+ normalized: List[EnvToolCall] = []
83
+
84
+ def _action_to_int(action: Any) -> Optional[int]:
85
+ # Handle invalid actions gracefully instead of failing
86
+ if isinstance(action, int):
87
+ return action
88
+ action_str = str(action)
89
+ if action_str not in CRAFTER_ACTIONS:
90
+ logger.warning("Unknown Crafter action: %s - ignoring", action_str)
91
+ return None # Signal to skip this action
92
+ return CRAFTER_ACTIONS[action_str]
93
+ for tc in tool_calls:
94
+ if isinstance(tc, EnvToolCall):
95
+ # Expand interact_many; otherwise coerce non-interact tools into interact(action=tool)
96
+ if tc.tool == "interact_many":
97
+ actions = tc.args.get("actions", [])
98
+ for action in actions:
99
+ action_int = _action_to_int(action)
100
+ if action_int is not None: # Skip invalid actions
101
+ normalized.append(
102
+ EnvToolCall(tool="interact", args={"action": action_int})
103
+ )
104
+ elif tc.tool != "interact":
105
+ candidate_action = tc.args.get("action") if isinstance(tc.args, dict) else None
106
+ resolved_action = candidate_action if candidate_action in allowed_actions else tc.tool
107
+ action_int = _action_to_int(resolved_action)
108
+ if action_int is not None: # Skip invalid actions
109
+ normalized.append(
110
+ EnvToolCall(tool="interact", args={"action": action_int})
111
+ )
112
+ else:
113
+ normalized.append(tc)
114
+ else:
115
+ # Dict input: handle both "tool" and "tool_name" keys
116
+ tool_name = tc.get("tool") or tc.get("tool_name")
117
+ if not tool_name:
118
+ raise ValueError(f"Tool call missing tool name: {tc}")
119
+ # Extract/parse args (may be JSON string from some clients)
120
+ args = tc.get("arguments") or tc.get("args") or {}
121
+ if isinstance(args, str):
122
+ import json as _json
123
+ try:
124
+ args = _json.loads(args)
125
+ except Exception:
126
+ args = {}
127
+ # Expand interact_many into multiple interacts
128
+ if tool_name == "interact_many":
129
+ for action in (args.get("actions") or []):
130
+ action_int = _action_to_int(action)
131
+ if action_int is not None: # Skip invalid actions
132
+ normalized.append(
133
+ EnvToolCall(tool="interact", args={"action": action_int})
134
+ )
135
+ else:
136
+ # For any non-interact tool, resolve to an interact action.
137
+ # Support a packed list of actions under 'actions' for convenience.
138
+ if isinstance(args, dict) and isinstance(args.get("actions"), list) and args.get("actions"):
139
+ for action in args.get("actions"):
140
+ action_int = _action_to_int(action)
141
+ if action_int is not None:
142
+ normalized.append(EnvToolCall(tool="interact", args={"action": action_int}))
143
+ else:
144
+ candidate_action = None
145
+ if isinstance(args, dict) and "action" in args:
146
+ candidate_action = args["action"]
147
+ # If the caller provided a numeric action id, accept it directly
148
+ action_int: Optional[int]
149
+ if isinstance(candidate_action, int):
150
+ action_int = _action_to_int(candidate_action)
151
+ elif isinstance(candidate_action, str) and candidate_action in allowed_actions:
152
+ action_int = _action_to_int(candidate_action)
153
+ else:
154
+ # Fallback: interpret the tool name itself as the action label
155
+ action_int = _action_to_int(tool_name)
156
+ if action_int is not None:
157
+ normalized.append(EnvToolCall(tool="interact", args={"action": action_int}))
158
+
159
+ # Ensure we have at least one valid action; default to noop if none provided
160
+ if not normalized:
161
+ logger.info("No valid actions provided, defaulting to noop")
162
+ normalized.append(EnvToolCall(tool="interact", args={"action": 0})) # noop action
163
+
164
+ # Pre-step logging: capture current public state and print concise summary
165
+ before_state: Optional[Dict[str, Any]] = None
166
+ try:
167
+ pub_before = self.env.engine._get_public_state_from_env() # type: ignore[attr-defined]
168
+ before_state = {
169
+ "inventory": pub_before.inventory,
170
+ "achievements_status": pub_before.achievements_status,
171
+ "player_position": list(pub_before.player_position),
172
+ "player_direction": pub_before.player_direction,
173
+ "semantic_map": pub_before.semantic_map,
174
+ }
175
+ actions_printable = [
176
+ (tc.args.get("action") if isinstance(tc.args, dict) else None) if isinstance(tc, EnvToolCall) else None
177
+ for tc in normalized
178
+ ]
179
+ logger.info(
180
+ "Crafter BEFORE seed=%s step_idx=%s pos=%s inv=%s ach=%s actions=%s",
181
+ str(self.seed),
182
+ self.step_idx,
183
+ before_state.get("player_position"),
184
+ {k: v for k, v in before_state["inventory"].items() if v},
185
+ [k for k, v in before_state["achievements_status"].items() if v],
186
+ actions_printable,
187
+ )
188
+ logger.info("Surroundings BEFORE (seed=%s):\n%s", str(self.seed), _format_semantic_map_view(before_state))
189
+ except Exception as _:
190
+ # Logging should not interfere with stepping; fail-fast elsewhere
191
+ pass
192
+
193
+ if not normalized:
194
+ raise ValueError("No valid actions provided to CrafterEnvironmentWrapper.step()")
195
+
196
+ # Execute actions sequentially so multi-action tool calls actually advance the world
197
+ last_obs: Any = None
198
+ for single_call in normalized:
199
+ last_obs = await self.env.step(single_call)
200
+ self.step_idx += 1
201
+
202
+ obs = last_obs
203
+ observation = getattr(obs, "observation", obs)
204
+ info = getattr(obs, "info", None)
205
+ done = getattr(obs, "done", False) # Default to False if None
206
+ reward = getattr(obs, "reward", None)
207
+ truncated = getattr(obs, "truncated", None)
208
+ self.last_observation = observation
209
+ self.last_info = info
210
+
211
+ # Post-step logging: capture new public state and print concise summary
212
+ ach_added_latest: list[str] | None = None
213
+ try:
214
+ pub_after = self.env.engine._get_public_state_from_env() # type: ignore[attr-defined]
215
+ after_dict: Dict[str, Any] = {
216
+ "inventory": pub_after.inventory,
217
+ "achievements_status": pub_after.achievements_status,
218
+ "player_position": list(pub_after.player_position),
219
+ "player_direction": pub_after.player_direction,
220
+ "semantic_map": pub_after.semantic_map,
221
+ }
222
+ logger.info(
223
+ "Crafter AFTER seed=%s step_idx=%s pos=%s inv=%s ach=%s done=%s reward=%s",
224
+ str(self.seed),
225
+ self.step_idx,
226
+ after_dict.get("player_position"),
227
+ {k: v for k, v in after_dict["inventory"].items() if v},
228
+ [k for k, v in after_dict["achievements_status"].items() if v],
229
+ bool(done) if done is not None else False,
230
+ reward,
231
+ )
232
+
233
+ # Changes/diff summary (position and inventory)
234
+ if before_state is not None:
235
+ try:
236
+ # Position delta
237
+ pb = before_state.get("player_position", [0, 0])
238
+ pa = after_dict.get("player_position", [0, 0])
239
+ pb_t = (int(pb[0]), int(pb[1])) if isinstance(pb, (list, tuple)) else (0, 0)
240
+ pa_t = (int(pa[0]), int(pa[1])) if isinstance(pa, (list, tuple)) else (0, 0)
241
+ delta = (pa_t[0] - pb_t[0], pa_t[1] - pb_t[1])
242
+
243
+ # Inventory changes
244
+ inv_b = before_state.get("inventory", {}) or {}
245
+ inv_a = after_dict.get("inventory", {}) or {}
246
+ changed_items = []
247
+ all_keys = set(inv_b.keys()) | set(inv_a.keys())
248
+ for key in sorted(all_keys):
249
+ vb = int(inv_b.get(key, 0) or 0)
250
+ va = int(inv_a.get(key, 0) or 0)
251
+ if vb != va:
252
+ changed_items.append(f"{key}:{vb}->{va}(Δ{va - vb})")
253
+ inv_changes = ", ".join(changed_items) if changed_items else "none"
254
+
255
+ # Achievements gained/lost
256
+ ach_b = {k for k, v in (before_state.get("achievements_status", {}) or {}).items() if v}
257
+ ach_a = {k for k, v in (after_dict.get("achievements_status", {}) or {}).items() if v}
258
+ ach_added = sorted(list(ach_a - ach_b))
259
+ ach_added_latest = ach_added
260
+ ach_removed = sorted(list(ach_b - ach_a))
261
+
262
+ logger.info(
263
+ "Changes: pos %s->%s Δ=%s | inv %s | ach +%s -%s",
264
+ pb_t,
265
+ pa_t,
266
+ delta,
267
+ inv_changes,
268
+ ach_added if ach_added else [],
269
+ ach_removed if ach_removed else [],
270
+ )
271
+ # Reward shaping immediately so logs and response reflect it
272
+ if reward is None and ach_added_latest:
273
+ try:
274
+ reward = float(len(ach_added_latest))
275
+ logger.info("Reward shaping applied: +%s (achievements added)", len(ach_added_latest))
276
+ except Exception:
277
+ pass
278
+ except Exception:
279
+ pass
280
+ logger.info("Surroundings AFTER (seed=%s):\n%s", str(self.seed), _format_semantic_map_view(after_dict))
281
+ except Exception as _:
282
+ pass
283
+ result: Dict[str, Any] = {
284
+ "observation": convert_numpy_to_python(observation),
285
+ "step_idx": self.step_idx,
286
+ "done": bool(done) if done is not None else False, # Ensure boolean
287
+ }
288
+ # Attach a 7x7 semantic map patch centered on player for client-side rendering
289
+ try:
290
+ sem = after_dict.get("semantic_map")
291
+ pos = after_dict.get("player_position") or [0, 0]
292
+ px, py = int(pos[0]), int(pos[1])
293
+ size = 7
294
+ half = size // 2
295
+ patch = []
296
+ H = len(sem) if hasattr(sem, "__len__") else 0
297
+ W = len(sem[0]) if H and hasattr(sem[0], "__len__") else 0
298
+ for dy in range(-half, half + 1):
299
+ row = []
300
+ for dx in range(-half, half + 1):
301
+ x, y = px + dx, py + dy
302
+ if 0 <= x < H and 0 <= y < W:
303
+ row.append(int(sem[x][y]))
304
+ else:
305
+ row.append(0)
306
+ patch.append(row)
307
+ obs_out = result.get("observation")
308
+ if isinstance(obs_out, dict):
309
+ obs_out["semantic_map_patch7"] = patch
310
+ except Exception:
311
+ pass
312
+ if info is not None:
313
+ result_info = convert_numpy_to_python(info)
314
+ else:
315
+ result_info = {}
316
+ # Attach achievements delta for downstream metrics if useful
317
+ if ach_added_latest is not None:
318
+ try:
319
+ if not isinstance(result_info, dict):
320
+ result_info = {"_raw_info": result_info}
321
+ result_info["achievements_added"] = ach_added_latest
322
+ except Exception:
323
+ pass
324
+ if result_info:
325
+ result["info"] = result_info
326
+ if reward is not None:
327
+ result["reward"] = convert_numpy_to_python(reward)
328
+ # Also expose last-step reward inside observation for stepwise consumers
329
+ try:
330
+ obs_out = result.get("observation")
331
+ if isinstance(obs_out, dict):
332
+ obs_out.setdefault("reward_last_step", convert_numpy_to_python(reward))
333
+ except Exception:
334
+ pass
335
+ if truncated is not None:
336
+ result["truncated"] = truncated
337
+
338
+ # Aggregated step summary: action frequencies and achievement stats
339
+ try:
340
+ # Build reverse action map for readability
341
+ int_to_action = {v: k for k, v in CRAFTER_ACTIONS.items()}
342
+ from collections import Counter
343
+ action_ids = []
344
+ for tc in normalized:
345
+ if isinstance(tc, EnvToolCall) and isinstance(tc.args, dict):
346
+ a = tc.args.get("action")
347
+ if isinstance(a, int):
348
+ action_ids.append(a)
349
+ action_names = [int_to_action.get(a, str(a)) for a in action_ids]
350
+ action_freq = Counter(action_names)
351
+
352
+ # Public achievements after step
353
+ pub_after = self.env.engine._get_public_state_from_env() # type: ignore[attr-defined]
354
+ unlocked = [name for name, on in pub_after.achievements_status.items() if on]
355
+ ach_freq = Counter(unlocked)
356
+
357
+ # Private achievement values (means)
358
+ priv_after = self.env.engine._get_private_state_from_env(0.0, False, False) # type: ignore[attr-defined]
359
+ values = list((priv_after.achievements_current_values or {}).values())
360
+ mean_all = (sum(values) / len(values)) if values else 0.0
361
+ nonzero = [v for v in values if v]
362
+ mean_nonzero = (sum(nonzero) / len(nonzero)) if nonzero else 0.0
363
+
364
+ logger.info(
365
+ "Step summary: seed=%s | actions=%s | achievements=%s | mean_ach_all=%.3f mean_ach_nonzero=%.3f",
366
+ str(self.seed),
367
+ dict(action_freq),
368
+ dict(ach_freq),
369
+ mean_all,
370
+ mean_nonzero,
371
+ )
372
+ except Exception:
373
+ pass
374
+ return result
375
+
376
+ async def checkpoint(self) -> Dict[str, Any]:
377
+ obs = await self.env.checkpoint()
378
+ observation = getattr(obs, "observation", obs)
379
+ info = getattr(obs, "info", None)
380
+ return {
381
+ "observation": convert_numpy_to_python(observation),
382
+ "info": convert_numpy_to_python(info) if info else None,
383
+ "step_idx": self.step_idx
384
+ }
385
+
386
+ async def terminate(self) -> Dict[str, Any]:
387
+ obs = await self.env.terminate()
388
+ observation = getattr(obs, "observation", obs)
389
+ info = getattr(obs, "info", None)
390
+ return {
391
+ "observation": convert_numpy_to_python(observation),
392
+ "info": convert_numpy_to_python(info) if info else None,
393
+ "step_idx": self.step_idx
394
+ }
395
+
396
+ def state_dict(self) -> Dict[str, Any]:
397
+ return {
398
+ "seed": self.seed,
399
+ "step_idx": self.step_idx,
400
+ "last_observation": self.last_observation,
401
+ "last_info": self.last_info,
402
+ }
403
+
404
+ def load_state_dict(self, state: Dict[str, Any]) -> None:
405
+ self.seed = state["seed"]
406
+ self.step_idx = int(state["step_idx"])
407
+ self.last_observation = state["last_observation"]
408
+ self.last_info = state["last_info"]
409
+
410
+ async def serialize(self) -> Dict[str, Any]:
411
+ return {
412
+ "name": "crafter",
413
+ "config": {"seed": self.seed},
414
+ "state": self.state_dict(),
415
+ }
416
+
417
+ @classmethod
418
+ async def deserialize(
419
+ cls,
420
+ payload: Dict[str, Any],
421
+ env: StatefulEnvironment,
422
+ ) -> "CrafterEnvironmentWrapper":
423
+ seed = payload["config"]["seed"]
424
+ wrapper = cls(env=env, seed=seed)
425
+ wrapper.load_state_dict(payload["state"])
426
+ return wrapper
427
+
428
+
429
+ __all__ = ["CrafterEnvironmentWrapper"]