synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.2.0.dist-info/METADATA +0 -36
- synth_ai-0.2.0.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,775 @@
|
|
1
|
+
from fastapi import APIRouter, HTTPException, Body
|
2
|
+
from uuid import uuid4
|
3
|
+
from typing import Dict, Any, List, Optional
|
4
|
+
from types import SimpleNamespace
|
5
|
+
from pydantic import BaseModel
|
6
|
+
import os
|
7
|
+
import json
|
8
|
+
import pickle
|
9
|
+
import base64
|
10
|
+
import numpy as np
|
11
|
+
import tempfile
|
12
|
+
from dataclasses import dataclass
|
13
|
+
|
14
|
+
from synth_ai.environments.service.registry import get_environment_cls, list_supported_env_types
|
15
|
+
from synth_ai.environments.stateful.core import StatefulEnvironment
|
16
|
+
from synth_ai.environments.environment.tools import EnvToolCall
|
17
|
+
|
18
|
+
# Try to import Redis for persistent storage
|
19
|
+
try:
|
20
|
+
import redis.asyncio as aioredis
|
21
|
+
|
22
|
+
REDIS_AVAILABLE = True
|
23
|
+
# Create Redis client
|
24
|
+
redis_client = aioredis.from_url(
|
25
|
+
os.getenv("REDIS_URL", "redis://localhost:6379"),
|
26
|
+
encoding="utf-8",
|
27
|
+
decode_responses=False, # We need binary mode for pickle
|
28
|
+
)
|
29
|
+
except ImportError:
|
30
|
+
REDIS_AVAILABLE = False
|
31
|
+
redis_client = None
|
32
|
+
|
33
|
+
# --- NEW: Global toggle to disable Redis entirely ----------------------------
|
34
|
+
# Default is *in-memory* only. Set SYNTH_USE_INMEM=0 to enable Redis if available.
|
35
|
+
if os.getenv("SYNTH_USE_INMEM", "1") == "1":
|
36
|
+
REDIS_AVAILABLE = False
|
37
|
+
redis_client = None
|
38
|
+
# -----------------------------------------------------------------------------
|
39
|
+
|
40
|
+
api_router = APIRouter()
|
41
|
+
|
42
|
+
# Fallback in-memory store if Redis is not available
|
43
|
+
instances: Dict[str, StatefulEnvironment] = {}
|
44
|
+
|
45
|
+
|
46
|
+
# Environment-specific task instance creation
|
47
|
+
@dataclass
|
48
|
+
class MinimalTaskInstanceMetadata:
|
49
|
+
"""Minimal metadata for environments that need it."""
|
50
|
+
|
51
|
+
pass
|
52
|
+
|
53
|
+
|
54
|
+
@dataclass
|
55
|
+
class MinimalIntent:
|
56
|
+
"""Minimal intent for environments that need it."""
|
57
|
+
|
58
|
+
rubric: Dict[str, Any]
|
59
|
+
gold_trajectories: Optional[Any] = None
|
60
|
+
gold_state_diff: Dict = None
|
61
|
+
deterministic_eval_functions: list = None
|
62
|
+
|
63
|
+
def __post_init__(self):
|
64
|
+
if self.gold_state_diff is None:
|
65
|
+
self.gold_state_diff = {}
|
66
|
+
if self.deterministic_eval_functions is None:
|
67
|
+
self.deterministic_eval_functions = []
|
68
|
+
|
69
|
+
|
70
|
+
@dataclass
|
71
|
+
class MinimalImpetus:
|
72
|
+
"""Minimal impetus for environments that need it."""
|
73
|
+
|
74
|
+
instructions: str
|
75
|
+
|
76
|
+
|
77
|
+
def create_task_instance_for_environment(
|
78
|
+
env_name: str,
|
79
|
+
initial_state: Optional[Dict[str, Any]] = None,
|
80
|
+
config: Optional[Dict[str, Any]] = None,
|
81
|
+
) -> Any:
|
82
|
+
"""Create appropriate task instance for different environments."""
|
83
|
+
|
84
|
+
if env_name in ["Sokoban", "CrafterClassic", "MiniGrid", "TicTacToe"]:
|
85
|
+
# These environments work with SimpleNamespace
|
86
|
+
task = SimpleNamespace(initial_engine_snapshot=initial_state or {})
|
87
|
+
|
88
|
+
# For MiniGrid, handle seed-based environment selection
|
89
|
+
if env_name == "MiniGrid" and config:
|
90
|
+
# Check if a seed is provided in config
|
91
|
+
if "seed" in config:
|
92
|
+
task.initial_engine_snapshot["seed"] = config["seed"]
|
93
|
+
|
94
|
+
# Check if a specific environment is requested
|
95
|
+
if "env_name" in config:
|
96
|
+
task.initial_engine_snapshot["env_name"] = config["env_name"]
|
97
|
+
|
98
|
+
return task
|
99
|
+
|
100
|
+
elif env_name == "Verilog":
|
101
|
+
# Verilog needs a snapshot_dir attribute
|
102
|
+
# Create a temporary directory for the snapshot
|
103
|
+
temp_dir = tempfile.mkdtemp(prefix="verilog_task_")
|
104
|
+
task = SimpleNamespace(
|
105
|
+
initial_engine_snapshot=initial_state,
|
106
|
+
snapshot_dir=temp_dir,
|
107
|
+
metadata=MinimalTaskInstanceMetadata(),
|
108
|
+
id=uuid4(),
|
109
|
+
)
|
110
|
+
return task
|
111
|
+
|
112
|
+
elif env_name == "NetHack":
|
113
|
+
# NetHack needs proper TaskInstance structure with NetHackTaskInstanceMetadata
|
114
|
+
from synth_ai.environments.examples.nethack.taskset import NetHackTaskInstanceMetadata
|
115
|
+
|
116
|
+
metadata = NetHackTaskInstanceMetadata(
|
117
|
+
character_role="tourist", # Easy starting character
|
118
|
+
starting_level=1,
|
119
|
+
target_depth=3,
|
120
|
+
time_limit=1000,
|
121
|
+
difficulty="tutorial",
|
122
|
+
special_objectives=["Explore at least 3 different dungeon levels"],
|
123
|
+
seed=42,
|
124
|
+
)
|
125
|
+
|
126
|
+
task = SimpleNamespace(
|
127
|
+
initial_engine_snapshot=initial_state,
|
128
|
+
metadata=metadata,
|
129
|
+
id=uuid4(),
|
130
|
+
intent=MinimalIntent(rubric={"success": "reach target depth"}),
|
131
|
+
impetus=MinimalImpetus(instructions="Play NetHack and achieve the highest score."),
|
132
|
+
is_reproducible=False,
|
133
|
+
)
|
134
|
+
return task
|
135
|
+
|
136
|
+
elif env_name == "Enron":
|
137
|
+
# Enron needs task instance with email data
|
138
|
+
# For now, provide minimal structure
|
139
|
+
task = SimpleNamespace(
|
140
|
+
initial_engine_snapshot=initial_state,
|
141
|
+
metadata=MinimalTaskInstanceMetadata(),
|
142
|
+
id=uuid4(),
|
143
|
+
# Enron might need specific data structure
|
144
|
+
question=initial_state.get("question", "What information can you find?")
|
145
|
+
if initial_state
|
146
|
+
else "What information can you find?",
|
147
|
+
answer=initial_state.get("answer", "") if initial_state else "",
|
148
|
+
emails=initial_state.get("emails", []) if initial_state else [],
|
149
|
+
)
|
150
|
+
return task
|
151
|
+
|
152
|
+
else:
|
153
|
+
# Default: use SimpleNamespace for unknown environments
|
154
|
+
return SimpleNamespace(initial_engine_snapshot=initial_state)
|
155
|
+
|
156
|
+
|
157
|
+
async def reconstruct_task_instance_from_serialized(
|
158
|
+
env_name: str, serialized_data: Dict[str, Any]
|
159
|
+
) -> Any:
|
160
|
+
"""Reconstruct a task instance from serialized data for specific environment types."""
|
161
|
+
|
162
|
+
if env_name == "MiniGrid":
|
163
|
+
# MiniGrid has its own TaskInstance class with deserialize method
|
164
|
+
from synth_ai.environments.examples.minigrid.taskset import MiniGridTaskInstance
|
165
|
+
|
166
|
+
return await MiniGridTaskInstance.deserialize(serialized_data)
|
167
|
+
|
168
|
+
elif env_name == "Sokoban":
|
169
|
+
# Sokoban has its own TaskInstance class with deserialize method
|
170
|
+
from synth_ai.environments.examples.sokoban.taskset import SokobanTaskInstance
|
171
|
+
|
172
|
+
return await SokobanTaskInstance.deserialize(serialized_data)
|
173
|
+
|
174
|
+
elif env_name in ["CrafterClassic", "TicTacToe"]:
|
175
|
+
# These environments work with SimpleNamespace - convert serialized data back to SimpleNamespace
|
176
|
+
from types import SimpleNamespace
|
177
|
+
from uuid import UUID
|
178
|
+
|
179
|
+
task = SimpleNamespace()
|
180
|
+
task.id = UUID(serialized_data.get("id", str(uuid4())))
|
181
|
+
task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
|
182
|
+
task.metadata = SimpleNamespace(**serialized_data.get("metadata", {}))
|
183
|
+
|
184
|
+
# Handle impetus
|
185
|
+
impetus_data = serialized_data.get("impetus", {})
|
186
|
+
if impetus_data:
|
187
|
+
task.impetus = SimpleNamespace(instructions=impetus_data.get("instructions", ""))
|
188
|
+
|
189
|
+
# Handle intent
|
190
|
+
intent_data = serialized_data.get("intent", {})
|
191
|
+
if intent_data:
|
192
|
+
task.intent = SimpleNamespace(
|
193
|
+
rubric=intent_data.get("rubric", ""),
|
194
|
+
gold_trajectories=intent_data.get("gold_trajectories", []),
|
195
|
+
gold_state_diff=intent_data.get("gold_state_diff", {}),
|
196
|
+
)
|
197
|
+
|
198
|
+
task.is_reproducible = serialized_data.get("is_reproducible", True)
|
199
|
+
|
200
|
+
return task
|
201
|
+
|
202
|
+
elif env_name == "Verilog":
|
203
|
+
# Verilog needs special handling with snapshot_dir
|
204
|
+
from types import SimpleNamespace
|
205
|
+
from uuid import UUID
|
206
|
+
import tempfile
|
207
|
+
|
208
|
+
task = SimpleNamespace()
|
209
|
+
task.id = UUID(serialized_data.get("id", str(uuid4())))
|
210
|
+
task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
|
211
|
+
task.metadata = MinimalTaskInstanceMetadata()
|
212
|
+
task.snapshot_dir = tempfile.mkdtemp(prefix="verilog_task_")
|
213
|
+
|
214
|
+
# Handle impetus
|
215
|
+
impetus_data = serialized_data.get("impetus", {})
|
216
|
+
if impetus_data:
|
217
|
+
task.impetus = SimpleNamespace(instructions=impetus_data.get("instructions", ""))
|
218
|
+
|
219
|
+
# Handle intent
|
220
|
+
intent_data = serialized_data.get("intent", {})
|
221
|
+
if intent_data:
|
222
|
+
task.intent = SimpleNamespace(
|
223
|
+
rubric=intent_data.get("rubric", ""),
|
224
|
+
gold_trajectories=intent_data.get("gold_trajectories", []),
|
225
|
+
gold_state_diff=intent_data.get("gold_state_diff", {}),
|
226
|
+
)
|
227
|
+
|
228
|
+
task.is_reproducible = serialized_data.get("is_reproducible", True)
|
229
|
+
|
230
|
+
return task
|
231
|
+
|
232
|
+
elif env_name == "NetHack":
|
233
|
+
# NetHack needs proper TaskInstance structure with NetHackTaskInstanceMetadata
|
234
|
+
from synth_ai.environments.examples.nethack.taskset import NetHackTaskInstanceMetadata
|
235
|
+
from types import SimpleNamespace
|
236
|
+
from uuid import UUID
|
237
|
+
|
238
|
+
# Extract metadata from serialized data
|
239
|
+
metadata_data = serialized_data.get("metadata", {})
|
240
|
+
metadata = NetHackTaskInstanceMetadata(
|
241
|
+
character_role=metadata_data.get("character_role", "tourist"),
|
242
|
+
starting_level=metadata_data.get("starting_level", 1),
|
243
|
+
target_depth=metadata_data.get("target_depth", 3),
|
244
|
+
time_limit=metadata_data.get("time_limit", 1000),
|
245
|
+
difficulty=metadata_data.get("difficulty", "tutorial"),
|
246
|
+
special_objectives=metadata_data.get(
|
247
|
+
"special_objectives", ["Explore at least 3 different dungeon levels"]
|
248
|
+
),
|
249
|
+
seed=metadata_data.get("seed", 42),
|
250
|
+
)
|
251
|
+
|
252
|
+
task = SimpleNamespace()
|
253
|
+
task.id = UUID(serialized_data.get("id", str(uuid4())))
|
254
|
+
task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
|
255
|
+
task.metadata = metadata
|
256
|
+
|
257
|
+
# Handle impetus
|
258
|
+
impetus_data = serialized_data.get("impetus", {})
|
259
|
+
if impetus_data:
|
260
|
+
task.impetus = MinimalImpetus(
|
261
|
+
instructions=impetus_data.get(
|
262
|
+
"instructions", "Play NetHack and achieve the highest score."
|
263
|
+
)
|
264
|
+
)
|
265
|
+
else:
|
266
|
+
task.impetus = MinimalImpetus(
|
267
|
+
instructions="Play NetHack and achieve the highest score."
|
268
|
+
)
|
269
|
+
|
270
|
+
# Handle intent
|
271
|
+
intent_data = serialized_data.get("intent", {})
|
272
|
+
if intent_data:
|
273
|
+
task.intent = MinimalIntent(
|
274
|
+
rubric=intent_data.get("rubric", {"success": "reach target depth"}),
|
275
|
+
gold_trajectories=intent_data.get("gold_trajectories", []),
|
276
|
+
gold_state_diff=intent_data.get("gold_state_diff", {}),
|
277
|
+
)
|
278
|
+
else:
|
279
|
+
task.intent = MinimalIntent(rubric={"success": "reach target depth"})
|
280
|
+
|
281
|
+
task.is_reproducible = serialized_data.get("is_reproducible", False)
|
282
|
+
|
283
|
+
return task
|
284
|
+
|
285
|
+
elif env_name == "Enron":
|
286
|
+
# Enron needs task instance with email data
|
287
|
+
from types import SimpleNamespace
|
288
|
+
from uuid import UUID
|
289
|
+
|
290
|
+
task = SimpleNamespace()
|
291
|
+
task.id = UUID(serialized_data.get("id", str(uuid4())))
|
292
|
+
task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
|
293
|
+
task.metadata = MinimalTaskInstanceMetadata()
|
294
|
+
|
295
|
+
# Enron-specific fields
|
296
|
+
task.question = serialized_data.get("question", "What information can you find?")
|
297
|
+
task.answer = serialized_data.get("answer", "")
|
298
|
+
task.emails = serialized_data.get("emails", [])
|
299
|
+
|
300
|
+
# Handle impetus
|
301
|
+
impetus_data = serialized_data.get("impetus", {})
|
302
|
+
if impetus_data:
|
303
|
+
task.impetus = SimpleNamespace(instructions=impetus_data.get("instructions", ""))
|
304
|
+
|
305
|
+
# Handle intent
|
306
|
+
intent_data = serialized_data.get("intent", {})
|
307
|
+
if intent_data:
|
308
|
+
task.intent = SimpleNamespace(
|
309
|
+
rubric=intent_data.get("rubric", ""),
|
310
|
+
gold_trajectories=intent_data.get("gold_trajectories", []),
|
311
|
+
gold_state_diff=intent_data.get("gold_state_diff", {}),
|
312
|
+
)
|
313
|
+
|
314
|
+
task.is_reproducible = serialized_data.get("is_reproducible", True)
|
315
|
+
|
316
|
+
return task
|
317
|
+
|
318
|
+
else:
|
319
|
+
# Default: use SimpleNamespace for unknown environments
|
320
|
+
from types import SimpleNamespace
|
321
|
+
from uuid import UUID
|
322
|
+
|
323
|
+
task = SimpleNamespace()
|
324
|
+
task.id = UUID(serialized_data.get("id", str(uuid4())))
|
325
|
+
task.initial_engine_snapshot = serialized_data.get("initial_engine_snapshot", {})
|
326
|
+
|
327
|
+
# Handle impetus
|
328
|
+
impetus_data = serialized_data.get("impetus", {})
|
329
|
+
if impetus_data:
|
330
|
+
task.impetus = SimpleNamespace(instructions=impetus_data.get("instructions", ""))
|
331
|
+
|
332
|
+
# Handle intent
|
333
|
+
intent_data = serialized_data.get("intent", {})
|
334
|
+
if intent_data:
|
335
|
+
task.intent = SimpleNamespace(
|
336
|
+
rubric=intent_data.get("rubric", ""),
|
337
|
+
gold_trajectories=intent_data.get("gold_trajectories", []),
|
338
|
+
gold_state_diff=intent_data.get("gold_state_diff", {}),
|
339
|
+
)
|
340
|
+
|
341
|
+
task.is_reproducible = serialized_data.get("is_reproducible", True)
|
342
|
+
|
343
|
+
return task
|
344
|
+
|
345
|
+
|
346
|
+
# Storage abstraction
|
347
|
+
class InstanceStorage:
|
348
|
+
"""Abstract storage for environment instances"""
|
349
|
+
|
350
|
+
async def store(self, env_id: str, env: StatefulEnvironment):
|
351
|
+
"""Store an environment instance"""
|
352
|
+
# ALWAYS store in-memory as fallback
|
353
|
+
instances[env_id] = env
|
354
|
+
|
355
|
+
# ALSO try to store in Redis if available (but don't rely on it)
|
356
|
+
if REDIS_AVAILABLE and redis_client:
|
357
|
+
try:
|
358
|
+
# Serialize the environment using pickle and base64 encode
|
359
|
+
serialized = base64.b64encode(pickle.dumps(env)).decode("utf-8")
|
360
|
+
await redis_client.set(f"env_instance:{env_id}", serialized, ex=3600) # 1 hour TTL
|
361
|
+
print(f"✅ Stored environment {env_id} in Redis + in-memory")
|
362
|
+
except Exception as e:
|
363
|
+
print(f"⚠️ Redis storage failed, using in-memory fallback: {e}")
|
364
|
+
else:
|
365
|
+
print(f"✅ Stored environment {env_id} in-memory (Redis not available)")
|
366
|
+
|
367
|
+
async def get(self, env_id: str) -> Optional[StatefulEnvironment]:
|
368
|
+
"""Retrieve an environment instance"""
|
369
|
+
# Try in-memory first (most reliable)
|
370
|
+
if env_id in instances:
|
371
|
+
print(f"✅ Retrieved environment {env_id} from in-memory store")
|
372
|
+
return instances[env_id]
|
373
|
+
|
374
|
+
# Fallback to Redis if not in memory
|
375
|
+
if REDIS_AVAILABLE and redis_client:
|
376
|
+
try:
|
377
|
+
serialized = await redis_client.get(f"env_instance:{env_id}")
|
378
|
+
if serialized:
|
379
|
+
# Deserialize from base64 and pickle
|
380
|
+
env = pickle.loads(base64.b64decode(serialized))
|
381
|
+
print(f"✅ Retrieved environment {env_id} from Redis (restored to memory)")
|
382
|
+
# Store back in memory for next time
|
383
|
+
instances[env_id] = env
|
384
|
+
return env
|
385
|
+
except Exception as e:
|
386
|
+
print(f"⚠️ Redis retrieval failed: {e}")
|
387
|
+
|
388
|
+
print(f"❌ Environment {env_id} not found in either store")
|
389
|
+
return None
|
390
|
+
|
391
|
+
async def remove(self, env_id: str) -> Optional[StatefulEnvironment]:
|
392
|
+
"""Remove and return an environment instance"""
|
393
|
+
# Get the environment first
|
394
|
+
env = await self.get(env_id)
|
395
|
+
|
396
|
+
# Remove from in-memory store
|
397
|
+
removed_env = instances.pop(env_id, None)
|
398
|
+
|
399
|
+
# Also try to remove from Redis
|
400
|
+
if REDIS_AVAILABLE and redis_client:
|
401
|
+
try:
|
402
|
+
await redis_client.delete(f"env_instance:{env_id}")
|
403
|
+
print(f"✅ Removed environment {env_id} from both Redis and in-memory")
|
404
|
+
except Exception as e:
|
405
|
+
print(f"⚠️ Redis removal failed, removed from in-memory: {e}")
|
406
|
+
else:
|
407
|
+
print(f"✅ Removed environment {env_id} from in-memory")
|
408
|
+
|
409
|
+
return env or removed_env
|
410
|
+
|
411
|
+
|
412
|
+
# Global storage instance
|
413
|
+
storage = InstanceStorage()
|
414
|
+
|
415
|
+
|
416
|
+
def convert_numpy_types(obj):
|
417
|
+
"""Convert numpy types to native Python types for JSON serialization"""
|
418
|
+
import numpy as np
|
419
|
+
from dataclasses import is_dataclass
|
420
|
+
|
421
|
+
if isinstance(obj, dict):
|
422
|
+
return {key: convert_numpy_types(value) for key, value in obj.items()}
|
423
|
+
elif isinstance(obj, list):
|
424
|
+
return [convert_numpy_types(item) for item in obj]
|
425
|
+
elif isinstance(obj, tuple):
|
426
|
+
return tuple(convert_numpy_types(item) for item in obj)
|
427
|
+
elif isinstance(obj, np.integer):
|
428
|
+
return int(obj)
|
429
|
+
elif isinstance(obj, np.floating):
|
430
|
+
return float(obj)
|
431
|
+
elif isinstance(obj, np.ndarray):
|
432
|
+
return obj.tolist()
|
433
|
+
elif isinstance(obj, np.bool_):
|
434
|
+
return bool(obj)
|
435
|
+
elif is_dataclass(obj):
|
436
|
+
# Handle dataclasses safely - check if they have a to_dict method first
|
437
|
+
if hasattr(obj, "to_dict"):
|
438
|
+
return obj.to_dict()
|
439
|
+
else:
|
440
|
+
# Fallback to converting __dict__ but exclude numpy arrays to prevent recursion
|
441
|
+
result = {}
|
442
|
+
for key, value in obj.__dict__.items():
|
443
|
+
if not isinstance(value, np.ndarray):
|
444
|
+
result[key] = convert_numpy_types(value)
|
445
|
+
else:
|
446
|
+
result[key] = value.tolist() # Convert numpy arrays directly
|
447
|
+
return result
|
448
|
+
elif hasattr(obj, "__dict__") and not isinstance(obj, type):
|
449
|
+
# Handle other objects with __dict__ but be more cautious
|
450
|
+
try:
|
451
|
+
# Only process if it's likely to be a simple object
|
452
|
+
if len(obj.__dict__) < 50: # Avoid overly complex objects
|
453
|
+
result = {}
|
454
|
+
for key, value in obj.__dict__.items():
|
455
|
+
if not isinstance(value, np.ndarray):
|
456
|
+
result[key] = convert_numpy_types(value)
|
457
|
+
else:
|
458
|
+
result[key] = value.tolist()
|
459
|
+
return result
|
460
|
+
else:
|
461
|
+
return str(obj) # Fallback to string representation
|
462
|
+
except (RecursionError, AttributeError):
|
463
|
+
return str(obj) # Safe fallback
|
464
|
+
else:
|
465
|
+
return obj
|
466
|
+
|
467
|
+
|
468
|
+
# Request/Response models for better API documentation
|
469
|
+
class InitializeRequest(BaseModel):
|
470
|
+
initial_state: Optional[Dict[str, Any]] = None
|
471
|
+
config: Optional[Dict[str, Any]] = None
|
472
|
+
task_instance: Optional[Dict[str, Any]] = None # Add task_instance field
|
473
|
+
|
474
|
+
|
475
|
+
class StepRequest(BaseModel):
|
476
|
+
env_id: str
|
477
|
+
request_id: Optional[str] = None
|
478
|
+
action: Dict[str, Any]
|
479
|
+
|
480
|
+
|
481
|
+
class TerminateRequest(BaseModel):
|
482
|
+
env_id: str
|
483
|
+
|
484
|
+
|
485
|
+
@api_router.get("/health")
|
486
|
+
async def get_health():
|
487
|
+
return {"status": "ok", "supported_environments": list_supported_env_types()}
|
488
|
+
|
489
|
+
|
490
|
+
@api_router.post("/env/{env_name}/initialize")
|
491
|
+
async def initialize_env(env_name: str, request: InitializeRequest = Body(...)) -> Dict[str, Any]:
|
492
|
+
"""Initialize a new environment instance."""
|
493
|
+
import traceback
|
494
|
+
|
495
|
+
try:
|
496
|
+
print(f"🔍 Initializing {env_name} environment...")
|
497
|
+
|
498
|
+
cls = get_environment_cls(env_name)
|
499
|
+
print(f"✅ Got environment class: {cls}")
|
500
|
+
|
501
|
+
# Handle task_instance parameter - use it if provided, otherwise create a new one
|
502
|
+
if request.task_instance:
|
503
|
+
print(f"🔍 Using provided task_instance...")
|
504
|
+
task = await reconstruct_task_instance_from_serialized(env_name, request.task_instance)
|
505
|
+
print(f"✅ Reconstructed task instance: {type(task)}")
|
506
|
+
else:
|
507
|
+
print(f"🔍 Creating new task instance...")
|
508
|
+
# Create environment-specific task instance
|
509
|
+
task = create_task_instance_for_environment(
|
510
|
+
env_name, request.initial_state, request.config
|
511
|
+
)
|
512
|
+
print(f"✅ Created task instance: {type(task)}")
|
513
|
+
|
514
|
+
# This is where recursion might happen for Sokoban
|
515
|
+
print(f"🔍 Creating environment instance...")
|
516
|
+
env = cls(task)
|
517
|
+
print(f"✅ Created environment instance")
|
518
|
+
|
519
|
+
# Generate unique environment ID
|
520
|
+
env_id = str(uuid4())
|
521
|
+
print(f"✅ Generated env_id: {env_id}")
|
522
|
+
|
523
|
+
# Initialize and get first observation - this might also cause recursion
|
524
|
+
print(f"🔍 Calling env.initialize()...")
|
525
|
+
obs = await env.initialize()
|
526
|
+
print(f"✅ Environment initialized, observation type: {type(obs)}")
|
527
|
+
|
528
|
+
# Store the fully initialized environment (fixes Redis initialization bug)
|
529
|
+
print(f"🔍 Storing environment...")
|
530
|
+
await storage.store(env_id, env)
|
531
|
+
print(f"✅ Environment stored")
|
532
|
+
|
533
|
+
# Convert numpy types to Python types for JSON serialization
|
534
|
+
print(f"🔍 Converting numpy types...")
|
535
|
+
obs_serializable = convert_numpy_types(obs)
|
536
|
+
print(f"✅ Numpy types converted")
|
537
|
+
|
538
|
+
return {"env_id": env_id, "observation": obs_serializable, "done": False, "info": {}}
|
539
|
+
|
540
|
+
except RecursionError as e:
|
541
|
+
# Capture recursion errors specifically
|
542
|
+
stack_trace = traceback.format_exc()
|
543
|
+
print(f"❌ RECURSION ERROR in {env_name} initialization:")
|
544
|
+
print(stack_trace)
|
545
|
+
raise HTTPException(
|
546
|
+
status_code=400, detail=f"Recursion error during {env_name} initialization: {str(e)}"
|
547
|
+
)
|
548
|
+
|
549
|
+
except Exception as e:
|
550
|
+
# Capture all other errors
|
551
|
+
stack_trace = traceback.format_exc()
|
552
|
+
print(f"❌ ERROR in {env_name} initialization:")
|
553
|
+
print(stack_trace)
|
554
|
+
raise HTTPException(
|
555
|
+
status_code=400, detail=f"Error during {env_name} initialization: {str(e)}"
|
556
|
+
)
|
557
|
+
|
558
|
+
|
559
|
+
@api_router.post("/env/{env_name}/step")
|
560
|
+
async def step_env(env_name: str, request: StepRequest = Body(...)) -> Dict[str, Any]:
|
561
|
+
"""Execute a step in the environment."""
|
562
|
+
import uuid as uuid_module
|
563
|
+
import sys
|
564
|
+
|
565
|
+
# Use provided request_id or generate one
|
566
|
+
request_id = request.request_id or str(uuid_module.uuid4())[:8]
|
567
|
+
print(
|
568
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: request_id = {request_id}",
|
569
|
+
file=sys.stderr,
|
570
|
+
)
|
571
|
+
print(
|
572
|
+
f"\n🌐 ENVIRONMENTS SERVICE {request_id}: step_env HTTP endpoint called",
|
573
|
+
file=sys.stderr,
|
574
|
+
)
|
575
|
+
print(f"🌐 ENVIRONMENTS SERVICE {request_id}: env_name = {env_name}", file=sys.stderr)
|
576
|
+
print(
|
577
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: env_id = {request.env_id}",
|
578
|
+
file=sys.stderr,
|
579
|
+
)
|
580
|
+
print(
|
581
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: action = {request.action}",
|
582
|
+
file=sys.stderr,
|
583
|
+
)
|
584
|
+
|
585
|
+
# Log call stack to see where this HTTP request comes from
|
586
|
+
import traceback
|
587
|
+
|
588
|
+
stack = traceback.format_stack()
|
589
|
+
print(
|
590
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: Call stack (last 3 frames):",
|
591
|
+
file=sys.stderr,
|
592
|
+
)
|
593
|
+
for frame in stack[-3:]:
|
594
|
+
print(f" {frame.strip()}", file=sys.stderr)
|
595
|
+
|
596
|
+
print(
|
597
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: About to retrieve environment from storage",
|
598
|
+
file=sys.stderr,
|
599
|
+
)
|
600
|
+
env = await storage.get(request.env_id)
|
601
|
+
if not env:
|
602
|
+
print(
|
603
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: Environment not found!",
|
604
|
+
file=sys.stderr,
|
605
|
+
)
|
606
|
+
raise HTTPException(
|
607
|
+
status_code=404, detail=f"Environment instance {request.env_id} not found"
|
608
|
+
)
|
609
|
+
|
610
|
+
try:
|
611
|
+
print(
|
612
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: About to extract tool calls from action",
|
613
|
+
file=sys.stderr,
|
614
|
+
)
|
615
|
+
# Extract tool calls from action
|
616
|
+
raw_tool_calls = request.action.get("tool_calls", [])
|
617
|
+
print(
|
618
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: Extracted raw_tool_calls = {raw_tool_calls}",
|
619
|
+
file=sys.stderr,
|
620
|
+
)
|
621
|
+
|
622
|
+
# Convert dictionaries to EnvToolCall objects
|
623
|
+
tool_calls = []
|
624
|
+
for call_dict in raw_tool_calls:
|
625
|
+
if isinstance(call_dict, dict):
|
626
|
+
# Convert dict to EnvToolCall object
|
627
|
+
tool_call = EnvToolCall(
|
628
|
+
tool=call_dict.get("tool", ""), args=call_dict.get("args", {})
|
629
|
+
)
|
630
|
+
tool_calls.append(tool_call)
|
631
|
+
else:
|
632
|
+
# Already an EnvToolCall object
|
633
|
+
tool_calls.append(call_dict)
|
634
|
+
|
635
|
+
print(
|
636
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: Converted to EnvToolCall objects: {tool_calls}",
|
637
|
+
file=sys.stderr,
|
638
|
+
)
|
639
|
+
|
640
|
+
print(
|
641
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: About to call env.step()",
|
642
|
+
file=sys.stderr,
|
643
|
+
)
|
644
|
+
# Execute step
|
645
|
+
result = await env.step(tool_calls)
|
646
|
+
print(
|
647
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: env.step() completed, result type = {type(result)}",
|
648
|
+
file=sys.stderr,
|
649
|
+
)
|
650
|
+
|
651
|
+
print(
|
652
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: About to store environment back to storage",
|
653
|
+
file=sys.stderr,
|
654
|
+
)
|
655
|
+
# Store the updated environment state
|
656
|
+
await storage.store(request.env_id, env)
|
657
|
+
print(
|
658
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: Environment stored successfully",
|
659
|
+
file=sys.stderr,
|
660
|
+
)
|
661
|
+
|
662
|
+
# Format response
|
663
|
+
# FIX: StatefulEnvironment.step() returns observation dict directly,
|
664
|
+
# not a dict with 'observation', 'reward', 'done', 'info' keys
|
665
|
+
response = {
|
666
|
+
"observation": result, # result IS the observation
|
667
|
+
"reward": result.get("reward_last", None), # Try to get reward from obs
|
668
|
+
"done": result.get("terminated", False) or result.get("truncated", False),
|
669
|
+
"info": {
|
670
|
+
"terminated": result.get("terminated", False),
|
671
|
+
"truncated": result.get("truncated", False),
|
672
|
+
},
|
673
|
+
}
|
674
|
+
|
675
|
+
# Convert numpy types to Python types for JSON serialization
|
676
|
+
response_serializable = convert_numpy_types(response)
|
677
|
+
|
678
|
+
print(
|
679
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: Returning response with keys: {list(response_serializable.keys())}",
|
680
|
+
file=sys.stderr,
|
681
|
+
)
|
682
|
+
return response_serializable
|
683
|
+
except Exception as e:
|
684
|
+
print(
|
685
|
+
f"🌐 ENVIRONMENTS SERVICE {request_id}: Exception during step: {type(e).__name__} - {e}",
|
686
|
+
file=sys.stderr,
|
687
|
+
)
|
688
|
+
raise HTTPException(status_code=400, detail=str(e))
|
689
|
+
|
690
|
+
|
691
|
+
@api_router.post("/env/{env_name}/terminate")
|
692
|
+
async def terminate_env(env_name: str, request: TerminateRequest = Body(...)) -> Dict[str, Any]:
|
693
|
+
"""Terminate an environment instance."""
|
694
|
+
env = await storage.remove(request.env_id)
|
695
|
+
if not env:
|
696
|
+
raise HTTPException(
|
697
|
+
status_code=404, detail=f"Environment instance {request.env_id} not found"
|
698
|
+
)
|
699
|
+
|
700
|
+
try:
|
701
|
+
# Terminate environment and capture observation
|
702
|
+
observation = await env.terminate()
|
703
|
+
observation_serializable = convert_numpy_types(observation)
|
704
|
+
|
705
|
+
return {
|
706
|
+
"public": observation_serializable,
|
707
|
+
"private": {"instance_id": request.env_id},
|
708
|
+
}
|
709
|
+
except Exception as e:
|
710
|
+
raise HTTPException(status_code=400, detail=str(e))
|
711
|
+
|
712
|
+
|
713
|
+
# Keep backward compatibility endpoints but mark as deprecated
|
714
|
+
@api_router.post("/{env_type}/create", deprecated=True)
|
715
|
+
async def create_env_legacy(
|
716
|
+
env_type: str,
|
717
|
+
config: Optional[Dict[str, Any]] = None,
|
718
|
+
initial_state: Optional[Dict[str, Any]] = None,
|
719
|
+
) -> Dict[str, str]:
|
720
|
+
"""[DEPRECATED] Use /env/{env_name}/initialize instead."""
|
721
|
+
cls = get_environment_cls(env_type)
|
722
|
+
task = create_task_instance_for_environment(env_type, initial_state, config)
|
723
|
+
env = cls(task)
|
724
|
+
instance_id = str(uuid4())
|
725
|
+
|
726
|
+
# Initialize the environment before storing (fixes Redis initialization bug)
|
727
|
+
await env.initialize()
|
728
|
+
await storage.store(instance_id, env)
|
729
|
+
return {"instance_id": instance_id}
|
730
|
+
|
731
|
+
|
732
|
+
@api_router.post("/{env_type}/{instance_id}/reset", deprecated=True)
|
733
|
+
async def reset_env_legacy(
|
734
|
+
env_type: str, instance_id: str, seed: Optional[int] = None
|
735
|
+
) -> Dict[str, Any]:
|
736
|
+
"""[DEPRECATED] Use /env/{env_name}/initialize instead."""
|
737
|
+
env = await storage.get(instance_id)
|
738
|
+
if not env:
|
739
|
+
raise HTTPException(status_code=404, detail="Instance not found")
|
740
|
+
obs = await env.initialize()
|
741
|
+
obs_serializable = convert_numpy_types(obs)
|
742
|
+
return {"private": obs_serializable, "public": obs_serializable}
|
743
|
+
|
744
|
+
|
745
|
+
@api_router.post("/{env_type}/{instance_id}/step", deprecated=True)
|
746
|
+
async def step_env_legacy(env_type: str, instance_id: str, calls: List[Any]) -> Dict[str, Any]:
|
747
|
+
"""[DEPRECATED] Use /env/{env_name}/step instead."""
|
748
|
+
env = await storage.get(instance_id)
|
749
|
+
if not env:
|
750
|
+
raise HTTPException(status_code=404, detail="Instance not found")
|
751
|
+
obs = await env.step(calls)
|
752
|
+
obs_serializable = convert_numpy_types(obs)
|
753
|
+
return {"private": obs_serializable, "public": obs_serializable}
|
754
|
+
|
755
|
+
|
756
|
+
@api_router.post("/{env_type}/{instance_id}/terminate", deprecated=True)
|
757
|
+
async def terminate_env_legacy(env_type: str, instance_id: str) -> Any:
|
758
|
+
"""[DEPRECATED] Use /env/{env_name}/terminate instead."""
|
759
|
+
env = await storage.remove(instance_id)
|
760
|
+
if not env:
|
761
|
+
raise HTTPException(status_code=404, detail="Instance not found")
|
762
|
+
obs = await env.terminate()
|
763
|
+
obs_serializable = convert_numpy_types(obs)
|
764
|
+
return obs_serializable
|
765
|
+
|
766
|
+
|
767
|
+
@api_router.get("/{env_type}/{instance_id}/checkpoint")
|
768
|
+
async def checkpoint_env(env_type: str, instance_id: str) -> Dict[str, Any]:
|
769
|
+
"""Get a checkpoint of the environment state."""
|
770
|
+
env = await storage.get(instance_id)
|
771
|
+
if not env:
|
772
|
+
raise HTTPException(status_code=404, detail="Instance not found")
|
773
|
+
snapshot = await env.checkpoint()
|
774
|
+
snapshot_serializable = convert_numpy_types(snapshot)
|
775
|
+
return {"snapshot": snapshot_serializable}
|