synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,775 @@
1
+ from fastapi import APIRouter, HTTPException, Body
2
+ from uuid import uuid4
3
+ from typing import Dict, Any, List, Optional
4
+ from types import SimpleNamespace
5
+ from pydantic import BaseModel
6
+ import os
7
+ import json
8
+ import pickle
9
+ import base64
10
+ import numpy as np
11
+ import tempfile
12
+ from dataclasses import dataclass
13
+
14
+ from synth_ai.environments.service.registry import get_environment_cls, list_supported_env_types
15
+ from synth_ai.environments.stateful.core import StatefulEnvironment
16
+ from synth_ai.environments.environment.tools import EnvToolCall
17
+
18
+ # Try to import Redis for persistent storage
19
+ try:
20
+ import redis.asyncio as aioredis
21
+
22
+ REDIS_AVAILABLE = True
23
+ # Create Redis client
24
+ redis_client = aioredis.from_url(
25
+ os.getenv("REDIS_URL", "redis://localhost:6379"),
26
+ encoding="utf-8",
27
+ decode_responses=False, # We need binary mode for pickle
28
+ )
29
+ except ImportError:
30
+ REDIS_AVAILABLE = False
31
+ redis_client = None
32
+
33
+ # --- NEW: Global toggle to disable Redis entirely ----------------------------
34
+ # Default is *in-memory* only. Set SYNTH_USE_INMEM=0 to enable Redis if available.
35
+ if os.getenv("SYNTH_USE_INMEM", "1") == "1":
36
+ REDIS_AVAILABLE = False
37
+ redis_client = None
38
+ # -----------------------------------------------------------------------------
39
+
40
+ api_router = APIRouter()
41
+
42
+ # Fallback in-memory store if Redis is not available
43
+ instances: Dict[str, StatefulEnvironment] = {}
44
+
45
+
46
+ # Environment-specific task instance creation
47
+ @dataclass
48
+ class MinimalTaskInstanceMetadata:
49
+ """Minimal metadata for environments that need it."""
50
+
51
+ pass
52
+
53
+
54
+ @dataclass
55
+ class MinimalIntent:
56
+ """Minimal intent for environments that need it."""
57
+
58
+ rubric: Dict[str, Any]
59
+ gold_trajectories: Optional[Any] = None
60
+ gold_state_diff: Dict = None
61
+ deterministic_eval_functions: list = None
62
+
63
+ def __post_init__(self):
64
+ if self.gold_state_diff is None:
65
+ self.gold_state_diff = {}
66
+ if self.deterministic_eval_functions is None:
67
+ self.deterministic_eval_functions = []
68
+
69
+
70
+ @dataclass
71
+ class MinimalImpetus:
72
+ """Minimal impetus for environments that need it."""
73
+
74
+ instructions: str
75
+
76
+
77
+ def create_task_instance_for_environment(
78
+ env_name: str,
79
+ initial_state: Optional[Dict[str, Any]] = None,
80
+ config: Optional[Dict[str, Any]] = None,
81
+ ) -> Any:
82
+ """Create appropriate task instance for different environments."""
83
+
84
+ if env_name in ["Sokoban", "CrafterClassic", "MiniGrid", "TicTacToe"]:
85
+ # These environments work with SimpleNamespace
86
+ task = SimpleNamespace(initial_engine_snapshot=initial_state or {})
87
+
88
+ # For MiniGrid, handle seed-based environment selection
89
+ if env_name == "MiniGrid" and config:
90
+ # Check if a seed is provided in config
91
+ if "seed" in config:
92
+ task.initial_engine_snapshot["seed"] = config["seed"]
93
+
94
+ # Check if a specific environment is requested
95
+ if "env_name" in config:
96
+ task.initial_engine_snapshot["env_name"] = config["env_name"]
97
+
98
+ return task
99
+
100
+ elif env_name == "Verilog":
101
+ # Verilog needs a snapshot_dir attribute
102
+ # Create a temporary directory for the snapshot
103
+ temp_dir = tempfile.mkdtemp(prefix="verilog_task_")
104
+ task = SimpleNamespace(
105
+ initial_engine_snapshot=initial_state,
106
+ snapshot_dir=temp_dir,
107
+ metadata=MinimalTaskInstanceMetadata(),
108
+ id=uuid4(),
109
+ )
110
+ return task
111
+
112
+ elif env_name == "NetHack":
113
+ # NetHack needs proper TaskInstance structure with NetHackTaskInstanceMetadata
114
+ from synth_ai.environments.examples.nethack.taskset import NetHackTaskInstanceMetadata
115
+
116
+ metadata = NetHackTaskInstanceMetadata(
117
+ character_role="tourist", # Easy starting character
118
+ starting_level=1,
119
+ target_depth=3,
120
+ time_limit=1000,
121
+ difficulty="tutorial",
122
+ special_objectives=["Explore at least 3 different dungeon levels"],
123
+ seed=42,
124
+ )
125
+
126
+ task = SimpleNamespace(
127
+ initial_engine_snapshot=initial_state,
128
+ metadata=metadata,
129
+ id=uuid4(),
130
+ intent=MinimalIntent(rubric={"success": "reach target depth"}),
131
+ impetus=MinimalImpetus(instructions="Play NetHack and achieve the highest score."),
132
+ is_reproducible=False,
133
+ )
134
+ return task
135
+
136
+ elif env_name == "Enron":
137
+ # Enron needs task instance with email data
138
+ # For now, provide minimal structure
139
+ task = SimpleNamespace(
140
+ initial_engine_snapshot=initial_state,
141
+ metadata=MinimalTaskInstanceMetadata(),
142
+ id=uuid4(),
143
+ # Enron might need specific data structure
144
+ question=initial_state.get("question", "What information can you find?")
145
+ if initial_state
146
+ else "What information can you find?",
147
+ answer=initial_state.get("answer", "") if initial_state else "",
148
+ emails=initial_state.get("emails", []) if initial_state else [],
149
+ )
150
+ return task
151
+
152
+ else:
153
+ # Default: use SimpleNamespace for unknown environments
154
+ return SimpleNamespace(initial_engine_snapshot=initial_state)
155
+
156
+
157
+ async def reconstruct_task_instance_from_serialized(
158
+ env_name: str, serialized_data: Dict[str, Any]
159
+ ) -> Any:
160
+ """Reconstruct a task instance from serialized data for specific environment types."""
161
+
162
+ if env_name == "MiniGrid":
163
+ # MiniGrid has its own TaskInstance class with deserialize method
164
+ from synth_ai.environments.examples.minigrid.taskset import MiniGridTaskInstance
165
+
166
+ return await MiniGridTaskInstance.deserialize(serialized_data)
167
+
168
+ elif env_name == "Sokoban":
169
+ # Sokoban has its own TaskInstance class with deserialize method
170
+ from synth_ai.environments.examples.sokoban.taskset import SokobanTaskInstance
171
+
172
+ return await SokobanTaskInstance.deserialize(serialized_data)
173
+
174
+ elif env_name in ["CrafterClassic", "TicTacToe"]:
175
+ # These environments work with SimpleNamespace - convert serialized data back to SimpleNamespace
176
+ from types import SimpleNamespace
177
+ from uuid import UUID
178
+
179
+ task = SimpleNamespace()
180
+ task.id = UUID(serialized_data.get("id", str(uuid4())))
181
+ task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
182
+ task.metadata = SimpleNamespace(**serialized_data.get("metadata", {}))
183
+
184
+ # Handle impetus
185
+ impetus_data = serialized_data.get("impetus", {})
186
+ if impetus_data:
187
+ task.impetus = SimpleNamespace(instructions=impetus_data.get("instructions", ""))
188
+
189
+ # Handle intent
190
+ intent_data = serialized_data.get("intent", {})
191
+ if intent_data:
192
+ task.intent = SimpleNamespace(
193
+ rubric=intent_data.get("rubric", ""),
194
+ gold_trajectories=intent_data.get("gold_trajectories", []),
195
+ gold_state_diff=intent_data.get("gold_state_diff", {}),
196
+ )
197
+
198
+ task.is_reproducible = serialized_data.get("is_reproducible", True)
199
+
200
+ return task
201
+
202
+ elif env_name == "Verilog":
203
+ # Verilog needs special handling with snapshot_dir
204
+ from types import SimpleNamespace
205
+ from uuid import UUID
206
+ import tempfile
207
+
208
+ task = SimpleNamespace()
209
+ task.id = UUID(serialized_data.get("id", str(uuid4())))
210
+ task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
211
+ task.metadata = MinimalTaskInstanceMetadata()
212
+ task.snapshot_dir = tempfile.mkdtemp(prefix="verilog_task_")
213
+
214
+ # Handle impetus
215
+ impetus_data = serialized_data.get("impetus", {})
216
+ if impetus_data:
217
+ task.impetus = SimpleNamespace(instructions=impetus_data.get("instructions", ""))
218
+
219
+ # Handle intent
220
+ intent_data = serialized_data.get("intent", {})
221
+ if intent_data:
222
+ task.intent = SimpleNamespace(
223
+ rubric=intent_data.get("rubric", ""),
224
+ gold_trajectories=intent_data.get("gold_trajectories", []),
225
+ gold_state_diff=intent_data.get("gold_state_diff", {}),
226
+ )
227
+
228
+ task.is_reproducible = serialized_data.get("is_reproducible", True)
229
+
230
+ return task
231
+
232
+ elif env_name == "NetHack":
233
+ # NetHack needs proper TaskInstance structure with NetHackTaskInstanceMetadata
234
+ from synth_ai.environments.examples.nethack.taskset import NetHackTaskInstanceMetadata
235
+ from types import SimpleNamespace
236
+ from uuid import UUID
237
+
238
+ # Extract metadata from serialized data
239
+ metadata_data = serialized_data.get("metadata", {})
240
+ metadata = NetHackTaskInstanceMetadata(
241
+ character_role=metadata_data.get("character_role", "tourist"),
242
+ starting_level=metadata_data.get("starting_level", 1),
243
+ target_depth=metadata_data.get("target_depth", 3),
244
+ time_limit=metadata_data.get("time_limit", 1000),
245
+ difficulty=metadata_data.get("difficulty", "tutorial"),
246
+ special_objectives=metadata_data.get(
247
+ "special_objectives", ["Explore at least 3 different dungeon levels"]
248
+ ),
249
+ seed=metadata_data.get("seed", 42),
250
+ )
251
+
252
+ task = SimpleNamespace()
253
+ task.id = UUID(serialized_data.get("id", str(uuid4())))
254
+ task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
255
+ task.metadata = metadata
256
+
257
+ # Handle impetus
258
+ impetus_data = serialized_data.get("impetus", {})
259
+ if impetus_data:
260
+ task.impetus = MinimalImpetus(
261
+ instructions=impetus_data.get(
262
+ "instructions", "Play NetHack and achieve the highest score."
263
+ )
264
+ )
265
+ else:
266
+ task.impetus = MinimalImpetus(
267
+ instructions="Play NetHack and achieve the highest score."
268
+ )
269
+
270
+ # Handle intent
271
+ intent_data = serialized_data.get("intent", {})
272
+ if intent_data:
273
+ task.intent = MinimalIntent(
274
+ rubric=intent_data.get("rubric", {"success": "reach target depth"}),
275
+ gold_trajectories=intent_data.get("gold_trajectories", []),
276
+ gold_state_diff=intent_data.get("gold_state_diff", {}),
277
+ )
278
+ else:
279
+ task.intent = MinimalIntent(rubric={"success": "reach target depth"})
280
+
281
+ task.is_reproducible = serialized_data.get("is_reproducible", False)
282
+
283
+ return task
284
+
285
+ elif env_name == "Enron":
286
+ # Enron needs task instance with email data
287
+ from types import SimpleNamespace
288
+ from uuid import UUID
289
+
290
+ task = SimpleNamespace()
291
+ task.id = UUID(serialized_data.get("id", str(uuid4())))
292
+ task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
293
+ task.metadata = MinimalTaskInstanceMetadata()
294
+
295
+ # Enron-specific fields
296
+ task.question = serialized_data.get("question", "What information can you find?")
297
+ task.answer = serialized_data.get("answer", "")
298
+ task.emails = serialized_data.get("emails", [])
299
+
300
+ # Handle impetus
301
+ impetus_data = serialized_data.get("impetus", {})
302
+ if impetus_data:
303
+ task.impetus = SimpleNamespace(instructions=impetus_data.get("instructions", ""))
304
+
305
+ # Handle intent
306
+ intent_data = serialized_data.get("intent", {})
307
+ if intent_data:
308
+ task.intent = SimpleNamespace(
309
+ rubric=intent_data.get("rubric", ""),
310
+ gold_trajectories=intent_data.get("gold_trajectories", []),
311
+ gold_state_diff=intent_data.get("gold_state_diff", {}),
312
+ )
313
+
314
+ task.is_reproducible = serialized_data.get("is_reproducible", True)
315
+
316
+ return task
317
+
318
+ else:
319
+ # Default: use SimpleNamespace for unknown environments
320
+ from types import SimpleNamespace
321
+ from uuid import UUID
322
+
323
+ task = SimpleNamespace()
324
+ task.id = UUID(serialized_data.get("id", str(uuid4())))
325
+ task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
326
+
327
+ # Handle impetus
328
+ impetus_data = serialized_data.get("impetus", {})
329
+ if impetus_data:
330
+ task.impetus = SimpleNamespace(instructions=impetus_data.get("instructions", ""))
331
+
332
+ # Handle intent
333
+ intent_data = serialized_data.get("intent", {})
334
+ if intent_data:
335
+ task.intent = SimpleNamespace(
336
+ rubric=intent_data.get("rubric", ""),
337
+ gold_trajectories=intent_data.get("gold_trajectories", []),
338
+ gold_state_diff=intent_data.get("gold_state_diff", {}),
339
+ )
340
+
341
+ task.is_reproducible = serialized_data.get("is_reproducible", True)
342
+
343
+ return task
344
+
345
+
346
+ # Storage abstraction
347
+ class InstanceStorage:
348
+ """Abstract storage for environment instances"""
349
+
350
+ async def store(self, env_id: str, env: StatefulEnvironment):
351
+ """Store an environment instance"""
352
+ # ALWAYS store in-memory as fallback
353
+ instances[env_id] = env
354
+
355
+ # ALSO try to store in Redis if available (but don't rely on it)
356
+ if REDIS_AVAILABLE and redis_client:
357
+ try:
358
+ # Serialize the environment using pickle and base64 encode
359
+ serialized = base64.b64encode(pickle.dumps(env)).decode("utf-8")
360
+ await redis_client.set(f"env_instance:{env_id}", serialized, ex=3600) # 1 hour TTL
361
+ print(f"✅ Stored environment {env_id} in Redis + in-memory")
362
+ except Exception as e:
363
+ print(f"⚠️ Redis storage failed, using in-memory fallback: {e}")
364
+ else:
365
+ print(f"✅ Stored environment {env_id} in-memory (Redis not available)")
366
+
367
+ async def get(self, env_id: str) -> Optional[StatefulEnvironment]:
368
+ """Retrieve an environment instance"""
369
+ # Try in-memory first (most reliable)
370
+ if env_id in instances:
371
+ print(f"✅ Retrieved environment {env_id} from in-memory store")
372
+ return instances[env_id]
373
+
374
+ # Fallback to Redis if not in memory
375
+ if REDIS_AVAILABLE and redis_client:
376
+ try:
377
+ serialized = await redis_client.get(f"env_instance:{env_id}")
378
+ if serialized:
379
+ # Deserialize from base64 and pickle
380
+ env = pickle.loads(base64.b64decode(serialized))
381
+ print(f"✅ Retrieved environment {env_id} from Redis (restored to memory)")
382
+ # Store back in memory for next time
383
+ instances[env_id] = env
384
+ return env
385
+ except Exception as e:
386
+ print(f"⚠️ Redis retrieval failed: {e}")
387
+
388
+ print(f"❌ Environment {env_id} not found in either store")
389
+ return None
390
+
391
+ async def remove(self, env_id: str) -> Optional[StatefulEnvironment]:
392
+ """Remove and return an environment instance"""
393
+ # Get the environment first
394
+ env = await self.get(env_id)
395
+
396
+ # Remove from in-memory store
397
+ removed_env = instances.pop(env_id, None)
398
+
399
+ # Also try to remove from Redis
400
+ if REDIS_AVAILABLE and redis_client:
401
+ try:
402
+ await redis_client.delete(f"env_instance:{env_id}")
403
+ print(f"✅ Removed environment {env_id} from both Redis and in-memory")
404
+ except Exception as e:
405
+ print(f"⚠️ Redis removal failed, removed from in-memory: {e}")
406
+ else:
407
+ print(f"✅ Removed environment {env_id} from in-memory")
408
+
409
+ return env or removed_env
410
+
411
+
412
+ # Global storage instance
413
+ storage = InstanceStorage()
414
+
415
+
416
+ def convert_numpy_types(obj):
417
+ """Convert numpy types to native Python types for JSON serialization"""
418
+ import numpy as np
419
+ from dataclasses import is_dataclass
420
+
421
+ if isinstance(obj, dict):
422
+ return {key: convert_numpy_types(value) for key, value in obj.items()}
423
+ elif isinstance(obj, list):
424
+ return [convert_numpy_types(item) for item in obj]
425
+ elif isinstance(obj, tuple):
426
+ return tuple(convert_numpy_types(item) for item in obj)
427
+ elif isinstance(obj, np.integer):
428
+ return int(obj)
429
+ elif isinstance(obj, np.floating):
430
+ return float(obj)
431
+ elif isinstance(obj, np.ndarray):
432
+ return obj.tolist()
433
+ elif isinstance(obj, np.bool_):
434
+ return bool(obj)
435
+ elif is_dataclass(obj):
436
+ # Handle dataclasses safely - check if they have a to_dict method first
437
+ if hasattr(obj, "to_dict"):
438
+ return obj.to_dict()
439
+ else:
440
+ # Fallback to converting __dict__ but exclude numpy arrays to prevent recursion
441
+ result = {}
442
+ for key, value in obj.__dict__.items():
443
+ if not isinstance(value, np.ndarray):
444
+ result[key] = convert_numpy_types(value)
445
+ else:
446
+ result[key] = value.tolist() # Convert numpy arrays directly
447
+ return result
448
+ elif hasattr(obj, "__dict__") and not isinstance(obj, type):
449
+ # Handle other objects with __dict__ but be more cautious
450
+ try:
451
+ # Only process if it's likely to be a simple object
452
+ if len(obj.__dict__) < 50: # Avoid overly complex objects
453
+ result = {}
454
+ for key, value in obj.__dict__.items():
455
+ if not isinstance(value, np.ndarray):
456
+ result[key] = convert_numpy_types(value)
457
+ else:
458
+ result[key] = value.tolist()
459
+ return result
460
+ else:
461
+ return str(obj) # Fallback to string representation
462
+ except (RecursionError, AttributeError):
463
+ return str(obj) # Safe fallback
464
+ else:
465
+ return obj
466
+
467
+
468
+ # Request/Response models for better API documentation
469
+ class InitializeRequest(BaseModel):
470
+ initial_state: Optional[Dict[str, Any]] = None
471
+ config: Optional[Dict[str, Any]] = None
472
+ task_instance: Optional[Dict[str, Any]] = None # Add task_instance field
473
+
474
+
475
+ class StepRequest(BaseModel):
476
+ env_id: str
477
+ request_id: Optional[str] = None
478
+ action: Dict[str, Any]
479
+
480
+
481
+ class TerminateRequest(BaseModel):
482
+ env_id: str
483
+
484
+
485
+ @api_router.get("/health")
486
+ async def get_health():
487
+ return {"status": "ok", "supported_environments": list_supported_env_types()}
488
+
489
+
490
+ @api_router.post("/env/{env_name}/initialize")
491
+ async def initialize_env(env_name: str, request: InitializeRequest = Body(...)) -> Dict[str, Any]:
492
+ """Initialize a new environment instance."""
493
+ import traceback
494
+
495
+ try:
496
+ print(f"🔍 Initializing {env_name} environment...")
497
+
498
+ cls = get_environment_cls(env_name)
499
+ print(f"✅ Got environment class: {cls}")
500
+
501
+ # Handle task_instance parameter - use it if provided, otherwise create a new one
502
+ if request.task_instance:
503
+ print(f"🔍 Using provided task_instance...")
504
+ task = await reconstruct_task_instance_from_serialized(env_name, request.task_instance)
505
+ print(f"✅ Reconstructed task instance: {type(task)}")
506
+ else:
507
+ print(f"🔍 Creating new task instance...")
508
+ # Create environment-specific task instance
509
+ task = create_task_instance_for_environment(
510
+ env_name, request.initial_state, request.config
511
+ )
512
+ print(f"✅ Created task instance: {type(task)}")
513
+
514
+ # This is where recursion might happen for Sokoban
515
+ print(f"🔍 Creating environment instance...")
516
+ env = cls(task)
517
+ print(f"✅ Created environment instance")
518
+
519
+ # Generate unique environment ID
520
+ env_id = str(uuid4())
521
+ print(f"✅ Generated env_id: {env_id}")
522
+
523
+ # Initialize and get first observation - this might also cause recursion
524
+ print(f"🔍 Calling env.initialize()...")
525
+ obs = await env.initialize()
526
+ print(f"✅ Environment initialized, observation type: {type(obs)}")
527
+
528
+ # Store the fully initialized environment (fixes Redis initialization bug)
529
+ print(f"🔍 Storing environment...")
530
+ await storage.store(env_id, env)
531
+ print(f"✅ Environment stored")
532
+
533
+ # Convert numpy types to Python types for JSON serialization
534
+ print(f"🔍 Converting numpy types...")
535
+ obs_serializable = convert_numpy_types(obs)
536
+ print(f"✅ Numpy types converted")
537
+
538
+ return {"env_id": env_id, "observation": obs_serializable, "done": False, "info": {}}
539
+
540
+ except RecursionError as e:
541
+ # Capture recursion errors specifically
542
+ stack_trace = traceback.format_exc()
543
+ print(f"❌ RECURSION ERROR in {env_name} initialization:")
544
+ print(stack_trace)
545
+ raise HTTPException(
546
+ status_code=400, detail=f"Recursion error during {env_name} initialization: {str(e)}"
547
+ )
548
+
549
+ except Exception as e:
550
+ # Capture all other errors
551
+ stack_trace = traceback.format_exc()
552
+ print(f"❌ ERROR in {env_name} initialization:")
553
+ print(stack_trace)
554
+ raise HTTPException(
555
+ status_code=400, detail=f"Error during {env_name} initialization: {str(e)}"
556
+ )
557
+
558
+
559
+ @api_router.post("/env/{env_name}/step")
560
+ async def step_env(env_name: str, request: StepRequest = Body(...)) -> Dict[str, Any]:
561
+ """Execute a step in the environment."""
562
+ import uuid as uuid_module
563
+ import sys
564
+
565
+ # Use provided request_id or generate one
566
+ request_id = request.request_id or str(uuid_module.uuid4())[:8]
567
+ print(
568
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: request_id = {request_id}",
569
+ file=sys.stderr,
570
+ )
571
+ print(
572
+ f"\n🌐 ENVIRONMENTS SERVICE {request_id}: step_env HTTP endpoint called",
573
+ file=sys.stderr,
574
+ )
575
+ print(f"🌐 ENVIRONMENTS SERVICE {request_id}: env_name = {env_name}", file=sys.stderr)
576
+ print(
577
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: env_id = {request.env_id}",
578
+ file=sys.stderr,
579
+ )
580
+ print(
581
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: action = {request.action}",
582
+ file=sys.stderr,
583
+ )
584
+
585
+ # Log call stack to see where this HTTP request comes from
586
+ import traceback
587
+
588
+ stack = traceback.format_stack()
589
+ print(
590
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: Call stack (last 3 frames):",
591
+ file=sys.stderr,
592
+ )
593
+ for frame in stack[-3:]:
594
+ print(f" {frame.strip()}", file=sys.stderr)
595
+
596
+ print(
597
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: About to retrieve environment from storage",
598
+ file=sys.stderr,
599
+ )
600
+ env = await storage.get(request.env_id)
601
+ if not env:
602
+ print(
603
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: Environment not found!",
604
+ file=sys.stderr,
605
+ )
606
+ raise HTTPException(
607
+ status_code=404, detail=f"Environment instance {request.env_id} not found"
608
+ )
609
+
610
+ try:
611
+ print(
612
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: About to extract tool calls from action",
613
+ file=sys.stderr,
614
+ )
615
+ # Extract tool calls from action
616
+ raw_tool_calls = request.action.get("tool_calls", [])
617
+ print(
618
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: Extracted raw_tool_calls = {raw_tool_calls}",
619
+ file=sys.stderr,
620
+ )
621
+
622
+ # Convert dictionaries to EnvToolCall objects
623
+ tool_calls = []
624
+ for call_dict in raw_tool_calls:
625
+ if isinstance(call_dict, dict):
626
+ # Convert dict to EnvToolCall object
627
+ tool_call = EnvToolCall(
628
+ tool=call_dict.get("tool", ""), args=call_dict.get("args", {})
629
+ )
630
+ tool_calls.append(tool_call)
631
+ else:
632
+ # Already an EnvToolCall object
633
+ tool_calls.append(call_dict)
634
+
635
+ print(
636
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: Converted to EnvToolCall objects: {tool_calls}",
637
+ file=sys.stderr,
638
+ )
639
+
640
+ print(
641
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: About to call env.step()",
642
+ file=sys.stderr,
643
+ )
644
+ # Execute step
645
+ result = await env.step(tool_calls)
646
+ print(
647
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: env.step() completed, result type = {type(result)}",
648
+ file=sys.stderr,
649
+ )
650
+
651
+ print(
652
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: About to store environment back to storage",
653
+ file=sys.stderr,
654
+ )
655
+ # Store the updated environment state
656
+ await storage.store(request.env_id, env)
657
+ print(
658
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: Environment stored successfully",
659
+ file=sys.stderr,
660
+ )
661
+
662
+ # Format response
663
+ # FIX: StatefulEnvironment.step() returns observation dict directly,
664
+ # not a dict with 'observation', 'reward', 'done', 'info' keys
665
+ response = {
666
+ "observation": result, # result IS the observation
667
+ "reward": result.get("reward_last", None), # Try to get reward from obs
668
+ "done": result.get("terminated", False) or result.get("truncated", False),
669
+ "info": {
670
+ "terminated": result.get("terminated", False),
671
+ "truncated": result.get("truncated", False),
672
+ },
673
+ }
674
+
675
+ # Convert numpy types to Python types for JSON serialization
676
+ response_serializable = convert_numpy_types(response)
677
+
678
+ print(
679
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: Returning response with keys: {list(response_serializable.keys())}",
680
+ file=sys.stderr,
681
+ )
682
+ return response_serializable
683
+ except Exception as e:
684
+ print(
685
+ f"🌐 ENVIRONMENTS SERVICE {request_id}: Exception during step: {type(e).__name__} - {e}",
686
+ file=sys.stderr,
687
+ )
688
+ raise HTTPException(status_code=400, detail=str(e))
689
+
690
+
691
+ @api_router.post("/env/{env_name}/terminate")
692
+ async def terminate_env(env_name: str, request: TerminateRequest = Body(...)) -> Dict[str, Any]:
693
+ """Terminate an environment instance."""
694
+ env = await storage.remove(request.env_id)
695
+ if not env:
696
+ raise HTTPException(
697
+ status_code=404, detail=f"Environment instance {request.env_id} not found"
698
+ )
699
+
700
+ try:
701
+ # Terminate environment and capture observation
702
+ observation = await env.terminate()
703
+ observation_serializable = convert_numpy_types(observation)
704
+
705
+ return {
706
+ "public": observation_serializable,
707
+ "private": {"instance_id": request.env_id},
708
+ }
709
+ except Exception as e:
710
+ raise HTTPException(status_code=400, detail=str(e))
711
+
712
+
713
+ # Keep backward compatibility endpoints but mark as deprecated
714
+ @api_router.post("/{env_type}/create", deprecated=True)
715
+ async def create_env_legacy(
716
+ env_type: str,
717
+ config: Optional[Dict[str, Any]] = None,
718
+ initial_state: Optional[Dict[str, Any]] = None,
719
+ ) -> Dict[str, str]:
720
+ """[DEPRECATED] Use /env/{env_name}/initialize instead."""
721
+ cls = get_environment_cls(env_type)
722
+ task = create_task_instance_for_environment(env_type, initial_state, config)
723
+ env = cls(task)
724
+ instance_id = str(uuid4())
725
+
726
+ # Initialize the environment before storing (fixes Redis initialization bug)
727
+ await env.initialize()
728
+ await storage.store(instance_id, env)
729
+ return {"instance_id": instance_id}
730
+
731
+
732
+ @api_router.post("/{env_type}/{instance_id}/reset", deprecated=True)
733
+ async def reset_env_legacy(
734
+ env_type: str, instance_id: str, seed: Optional[int] = None
735
+ ) -> Dict[str, Any]:
736
+ """[DEPRECATED] Use /env/{env_name}/initialize instead."""
737
+ env = await storage.get(instance_id)
738
+ if not env:
739
+ raise HTTPException(status_code=404, detail="Instance not found")
740
+ obs = await env.initialize()
741
+ obs_serializable = convert_numpy_types(obs)
742
+ return {"private": obs_serializable, "public": obs_serializable}
743
+
744
+
745
+ @api_router.post("/{env_type}/{instance_id}/step", deprecated=True)
746
+ async def step_env_legacy(env_type: str, instance_id: str, calls: List[Any]) -> Dict[str, Any]:
747
+ """[DEPRECATED] Use /env/{env_name}/step instead."""
748
+ env = await storage.get(instance_id)
749
+ if not env:
750
+ raise HTTPException(status_code=404, detail="Instance not found")
751
+ obs = await env.step(calls)
752
+ obs_serializable = convert_numpy_types(obs)
753
+ return {"private": obs_serializable, "public": obs_serializable}
754
+
755
+
756
+ @api_router.post("/{env_type}/{instance_id}/terminate", deprecated=True)
757
+ async def terminate_env_legacy(env_type: str, instance_id: str) -> Any:
758
+ """[DEPRECATED] Use /env/{env_name}/terminate instead."""
759
+ env = await storage.remove(instance_id)
760
+ if not env:
761
+ raise HTTPException(status_code=404, detail="Instance not found")
762
+ obs = await env.terminate()
763
+ obs_serializable = convert_numpy_types(obs)
764
+ return obs_serializable
765
+
766
+
767
+ @api_router.get("/{env_type}/{instance_id}/checkpoint")
768
+ async def checkpoint_env(env_type: str, instance_id: str) -> Dict[str, Any]:
769
+ """Get a checkpoint of the environment state."""
770
+ env = await storage.get(instance_id)
771
+ if not env:
772
+ raise HTTPException(status_code=404, detail="Instance not found")
773
+ snapshot = await env.checkpoint()
774
+ snapshot_serializable = convert_numpy_types(snapshot)
775
+ return {"snapshot": snapshot_serializable}