synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.2.0.dist-info/METADATA +0 -36
- synth_ai-0.2.0.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
# environment.py
|
2
|
+
from __future__ import annotations
|
3
|
+
from typing import List, Optional, Dict, Any, Union
|
4
|
+
from pydantic import BaseModel, Field
|
5
|
+
|
6
|
+
from synth_ai.environments.environment.tools import (
|
7
|
+
EnvToolCall,
|
8
|
+
ToolResult,
|
9
|
+
TOOL_REGISTRY,
|
10
|
+
register_tool,
|
11
|
+
)
|
12
|
+
from synth_ai.environments.environment.shared_engine import (
|
13
|
+
GetObservationCallable,
|
14
|
+
InternalObservation,
|
15
|
+
)
|
16
|
+
from synth_ai.environments.stateful.core import StatefulEnvironment
|
17
|
+
from synth_ai.environments.examples.enron.engine import (
|
18
|
+
EnronEngine,
|
19
|
+
ACTION_SEARCH,
|
20
|
+
ACTION_READ,
|
21
|
+
ACTION_ANSWER,
|
22
|
+
)
|
23
|
+
from synth_ai.environments.examples.enron.taskset import EnronTaskInstance
|
24
|
+
|
25
|
+
|
26
|
+
# -------- pydantic schemas (used by agent / LLM function calls)
|
27
|
+
class SearchEmailsArgs(BaseModel):
|
28
|
+
inbox: str = Field(..., description="Email address performing the search (used by tool logic)")
|
29
|
+
keywords: List[str] = Field(..., description="Keywords to AND-search for")
|
30
|
+
from_addr: Optional[str] = None
|
31
|
+
to_addr: Optional[str] = None
|
32
|
+
sent_after: Optional[str] = None
|
33
|
+
sent_before: Optional[str] = None
|
34
|
+
max_results: int = Field(10, le=10)
|
35
|
+
|
36
|
+
|
37
|
+
class ReadEmailArgs(BaseModel):
|
38
|
+
message_id: str
|
39
|
+
|
40
|
+
|
41
|
+
class AnswerQuestionArgs(BaseModel):
|
42
|
+
answer: str
|
43
|
+
|
44
|
+
|
45
|
+
# --------------------------------------------------------------------------- tool wrappers
|
46
|
+
class SearchEmails(EnvToolCall):
|
47
|
+
def __init__(self, **kwargs):
|
48
|
+
self.action = (ACTION_SEARCH, kwargs)
|
49
|
+
|
50
|
+
|
51
|
+
class ReadEmail(EnvToolCall):
|
52
|
+
def __init__(self, message_id: str):
|
53
|
+
self.action = (ACTION_READ, message_id)
|
54
|
+
|
55
|
+
|
56
|
+
class AnswerQuestion(EnvToolCall):
|
57
|
+
def __init__(self, answer: str):
|
58
|
+
self.action = (ACTION_ANSWER, answer)
|
59
|
+
|
60
|
+
|
61
|
+
# -- terminate wrapper (maps to an empty-answer ACTION_ANSWER) --------------
|
62
|
+
class Terminate(EnvToolCall):
|
63
|
+
def __init__(self):
|
64
|
+
self.action = (ACTION_ANSWER, "")
|
65
|
+
|
66
|
+
|
67
|
+
# -------- observation callable (optional for formatted observations)
|
68
|
+
class SynthEnronObservationCallable(GetObservationCallable):
|
69
|
+
async def get_observation(
|
70
|
+
self, pub: Dict[str, Any], priv: Dict[str, Any]
|
71
|
+
) -> InternalObservation:
|
72
|
+
"""Format observation as a human-readable string."""
|
73
|
+
q = pub.get("question")
|
74
|
+
rwd = priv.get("reward_last")
|
75
|
+
return f"Q: {q}\nTools: {pub.get('tools')}\nAnswered: {pub.get('already_answered')}\nSearch Res: {len(pub.get('search_results', []))} items\nEmail Loaded: {pub.get('email') is not None}\nTool Error: {pub.get('tool_error')}\nReward Δ: {rwd}"
|
76
|
+
|
77
|
+
|
78
|
+
# --------------------------------------------------------------------------- environment
|
79
|
+
class EnronEnvironment(StatefulEnvironment):
|
80
|
+
def __init__(
|
81
|
+
self,
|
82
|
+
task_instance: EnronTaskInstance,
|
83
|
+
custom_obs: Optional[GetObservationCallable] = None,
|
84
|
+
):
|
85
|
+
self.engine = EnronEngine(task_instance)
|
86
|
+
self.custom_obs = custom_obs or SynthEnronObservationCallable()
|
87
|
+
self.name = "Enron-QA-Env"
|
88
|
+
|
89
|
+
# Store tool instances on self for reliable access
|
90
|
+
self._tools_instances = {
|
91
|
+
"search_emails": SearchEmailsTool(self.engine),
|
92
|
+
"read_email": ReadEmailTool(self.engine),
|
93
|
+
"answer_question": AnswerQuestionTool(self.engine),
|
94
|
+
"terminate": TerminateTool(self.engine),
|
95
|
+
}
|
96
|
+
for tool_name, tool_instance in self._tools_instances.items():
|
97
|
+
if tool_name not in TOOL_REGISTRY:
|
98
|
+
register_tool(tool_instance)
|
99
|
+
elif TOOL_REGISTRY[tool_name].engine is not self.engine:
|
100
|
+
register_tool(tool_instance)
|
101
|
+
|
102
|
+
async def initialize(self) -> InternalObservation:
|
103
|
+
priv, pub = await self.engine._reset_engine()
|
104
|
+
return await self._obs(priv, pub)
|
105
|
+
|
106
|
+
async def step(
|
107
|
+
self,
|
108
|
+
calls: Union[EnvToolCall, List[EnvToolCall], List[List[EnvToolCall]]],
|
109
|
+
) -> InternalObservation:
|
110
|
+
# normalise → always [[EnvToolCall]]
|
111
|
+
if isinstance(calls, EnvToolCall):
|
112
|
+
calls = [[calls]]
|
113
|
+
elif calls and isinstance(calls[0], EnvToolCall):
|
114
|
+
calls = [calls]
|
115
|
+
|
116
|
+
if not isinstance(calls[0][0], EnvToolCall):
|
117
|
+
raise TypeError(f"Processed call is not EnvToolCall: {type(calls[0][0])}")
|
118
|
+
|
119
|
+
tool_name = calls[0][0].tool
|
120
|
+
tool_to_execute = self._tools_instances.get(tool_name)
|
121
|
+
|
122
|
+
if not tool_to_execute:
|
123
|
+
tool_to_execute = TOOL_REGISTRY.get(tool_name)
|
124
|
+
if not tool_to_execute:
|
125
|
+
raise ValueError(f"Tool '{tool_name}' not found.")
|
126
|
+
|
127
|
+
tool_result: ToolResult = await tool_to_execute(calls[0][0])
|
128
|
+
|
129
|
+
public_payload_for_engine = (
|
130
|
+
tool_result.payload if tool_result.ok and tool_result.payload else {}
|
131
|
+
)
|
132
|
+
if not tool_result.ok:
|
133
|
+
public_payload_for_engine["tool_error"] = tool_result.error
|
134
|
+
|
135
|
+
priv, pub = await self.engine._step_engine(public_payload_for_engine)
|
136
|
+
return await self._obs(priv, pub)
|
137
|
+
|
138
|
+
async def terminate(self) -> InternalObservation:
|
139
|
+
self.engine.close_db()
|
140
|
+
priv_state_on_terminate = {
|
141
|
+
"reward_last": 0,
|
142
|
+
"total_reward": self.engine.total_reward,
|
143
|
+
"terminated": True,
|
144
|
+
"truncated": False,
|
145
|
+
"gold_answer": self.engine._sample()["answer"],
|
146
|
+
}
|
147
|
+
pub_state_on_terminate = {
|
148
|
+
"question": self.engine._sample()["question"],
|
149
|
+
"tools": [],
|
150
|
+
"already_answered": self.engine.answered,
|
151
|
+
"status": "terminated_by_env",
|
152
|
+
}
|
153
|
+
return await self._obs(priv_state_on_terminate, pub_state_on_terminate)
|
154
|
+
|
155
|
+
async def checkpoint(self) -> InternalObservation:
|
156
|
+
snapshot = await self.engine._serialize_engine()
|
157
|
+
return {
|
158
|
+
"engine_snapshot": snapshot.model_dump(),
|
159
|
+
"message": "Checkpoint created",
|
160
|
+
}
|
161
|
+
|
162
|
+
async def _obs(self, priv: Dict[str, Any], pub: Dict[str, Any]):
|
163
|
+
if self.custom_obs:
|
164
|
+
return await self.custom_obs.get_observation(pub, priv)
|
165
|
+
return {**pub, **priv}
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# taskset.py
|
2
|
+
from __future__ import annotations
|
3
|
+
import asyncio
|
4
|
+
from uuid import uuid4
|
5
|
+
import os
|
6
|
+
|
7
|
+
from datasets import load_dataset
|
8
|
+
from dataclasses import dataclass, asdict
|
9
|
+
|
10
|
+
from synth_ai.environments.tasks.core import (
|
11
|
+
Task,
|
12
|
+
TaskInstance,
|
13
|
+
TaskInstanceSet,
|
14
|
+
TaskInstanceMetadata,
|
15
|
+
SplitInfo,
|
16
|
+
Impetus,
|
17
|
+
Intent,
|
18
|
+
)
|
19
|
+
|
20
|
+
enron_task = Task(
|
21
|
+
global_premises="Answer factual questions by reading Enron e-mails",
|
22
|
+
global_constraints="",
|
23
|
+
global_objectives="Provide the correct answer; minimise queries",
|
24
|
+
shared_env_params={},
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
# --------------------------------------------------------------------------- metadata
|
29
|
+
@dataclass
|
30
|
+
class EnronTaskInstanceMetadata(TaskInstanceMetadata):
|
31
|
+
split: str
|
32
|
+
email_count: int
|
33
|
+
message_ids: list[str]
|
34
|
+
|
35
|
+
|
36
|
+
@dataclass
|
37
|
+
class EnronTaskInstance(TaskInstance):
|
38
|
+
async def serialize(self):
|
39
|
+
data = asdict(self)
|
40
|
+
if isinstance(data.get("id"), uuid4().__class__):
|
41
|
+
data["id"] = str(data["id"])
|
42
|
+
return data
|
43
|
+
|
44
|
+
@classmethod
|
45
|
+
async def deserialize(cls, data: dict) -> "EnronTaskInstance":
|
46
|
+
return cls(**data)
|
47
|
+
|
48
|
+
|
49
|
+
# --------------------------------------------------------------------------- task-set builder
|
50
|
+
# Use a local dataset cache under examples/enron/dataset
|
51
|
+
CACHE_DIR = os.path.join(os.path.dirname(__file__), "dataset")
|
52
|
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
53
|
+
|
54
|
+
|
55
|
+
async def create_enron_taskset() -> TaskInstanceSet:
|
56
|
+
ds_train = load_dataset(
|
57
|
+
"corbt/enron_emails_sample_questions",
|
58
|
+
split="train",
|
59
|
+
cache_dir=CACHE_DIR,
|
60
|
+
)
|
61
|
+
ds_test = load_dataset(
|
62
|
+
"corbt/enron_emails_sample_questions",
|
63
|
+
split="test",
|
64
|
+
cache_dir=CACHE_DIR,
|
65
|
+
)
|
66
|
+
|
67
|
+
def to_instance(row: dict, split: str) -> EnronTaskInstance:
|
68
|
+
impetus = Impetus(instructions=row["question"])
|
69
|
+
intent = Intent(
|
70
|
+
rubric={"goal": "Answer the question using the Enron emails."},
|
71
|
+
gold_trajectories=None,
|
72
|
+
gold_state_diff={"answer": row["answer"]},
|
73
|
+
)
|
74
|
+
metadata = EnronTaskInstanceMetadata(
|
75
|
+
split=split,
|
76
|
+
email_count=len(row["message_ids"]),
|
77
|
+
message_ids=row["message_ids"],
|
78
|
+
)
|
79
|
+
return EnronTaskInstance(
|
80
|
+
id=uuid4(),
|
81
|
+
impetus=impetus,
|
82
|
+
intent=intent,
|
83
|
+
metadata=metadata,
|
84
|
+
is_reproducible=True,
|
85
|
+
initial_engine_snapshot=row,
|
86
|
+
)
|
87
|
+
|
88
|
+
train_instances = [to_instance(r, "train") for r in ds_train]
|
89
|
+
test_instances = [to_instance(r, "test") for r in ds_test]
|
90
|
+
|
91
|
+
split_info = SplitInfo(
|
92
|
+
val_instance_ids=set(),
|
93
|
+
test_instance_ids={inst.id for inst in test_instances},
|
94
|
+
_is_split_defined=True,
|
95
|
+
)
|
96
|
+
|
97
|
+
return TaskInstanceSet(
|
98
|
+
name="Enron-QA",
|
99
|
+
description="QA over Enron email dataset sample.",
|
100
|
+
instances=train_instances + test_instances,
|
101
|
+
split_info=split_info,
|
102
|
+
)
|
103
|
+
|
104
|
+
|
105
|
+
# quick sanity check ----------------------------------------------------------
|
106
|
+
if __name__ == "__main__":
|
107
|
+
|
108
|
+
async def _main():
|
109
|
+
ts = await create_enron_taskset()
|
110
|
+
print(f"{len(ts.instances)} instances built.")
|
111
|
+
|
112
|
+
asyncio.run(_main())
|
@@ -0,0 +1,111 @@
|
|
1
|
+
"""
|
2
|
+
Script: enron_keyword_logging.py
|
3
|
+
Purpose: Iterate over a sample of Enron-QA tasks and compare the hit-rate of the
|
4
|
+
full keyword list extracted from the natural-language question with the hit-rate
|
5
|
+
when the *final* keyword is dropped (the heuristic your current agent uses).
|
6
|
+
|
7
|
+
It logs the result counts side-by-side so you can see whether the heuristic is
|
8
|
+
generally helpful or not.
|
9
|
+
|
10
|
+
Run with:
|
11
|
+
python enron_keyword_logging.py --n 50 # test 50 random tasks
|
12
|
+
Outputs a CSV "keyword_stats.csv" for easy inspection in Excel/Sheets.
|
13
|
+
"""
|
14
|
+
|
15
|
+
from __future__ import annotations
|
16
|
+
|
17
|
+
import argparse
|
18
|
+
import csv
|
19
|
+
import re
|
20
|
+
import random
|
21
|
+
import asyncio
|
22
|
+
from pathlib import Path
|
23
|
+
from synth_ai.environments.examples.enron.taskset import create_enron_taskset
|
24
|
+
from synth_ai.environments.examples.enron.art_helpers import email_search_tools # low-level search
|
25
|
+
|
26
|
+
# --- simple helpers ---------------------------------------------------------
|
27
|
+
STOPWORDS = {
|
28
|
+
"the",
|
29
|
+
"a",
|
30
|
+
"an",
|
31
|
+
"and",
|
32
|
+
"or",
|
33
|
+
"of",
|
34
|
+
"at",
|
35
|
+
"in",
|
36
|
+
"on",
|
37
|
+
"for",
|
38
|
+
"to",
|
39
|
+
"with",
|
40
|
+
"my",
|
41
|
+
"your",
|
42
|
+
"our",
|
43
|
+
"did",
|
44
|
+
"do",
|
45
|
+
"is",
|
46
|
+
"was",
|
47
|
+
"were",
|
48
|
+
"be",
|
49
|
+
"been",
|
50
|
+
"am",
|
51
|
+
"when",
|
52
|
+
"what",
|
53
|
+
"which",
|
54
|
+
"who",
|
55
|
+
}
|
56
|
+
|
57
|
+
TOKEN_RE = re.compile(r"[A-Za-z0-9']+")
|
58
|
+
|
59
|
+
|
60
|
+
def extract_keywords(question: str) -> list[str]:
|
61
|
+
"""Very naive keyword extractor: tokens minus stop-words."""
|
62
|
+
tokens = [t.lower() for t in TOKEN_RE.findall(question)]
|
63
|
+
return [t for t in tokens if t not in STOPWORDS]
|
64
|
+
|
65
|
+
|
66
|
+
# ---------------------------------------------------------------------------
|
67
|
+
async def main(n: int):
|
68
|
+
taskset = await create_enron_taskset()
|
69
|
+
sample = random.sample(taskset.instances, k=min(n, len(taskset.instances)))
|
70
|
+
|
71
|
+
rows: list[dict[str, str | int]] = []
|
72
|
+
for inst in sample:
|
73
|
+
q = inst.impetus.instructions
|
74
|
+
kws_full = extract_keywords(q)
|
75
|
+
if not kws_full:
|
76
|
+
continue
|
77
|
+
|
78
|
+
# search using the low-level helper once so we don't need a whole env
|
79
|
+
hits_full = email_search_tools.search_emails(inbox="user", keywords=kws_full, max_results=5)
|
80
|
+
|
81
|
+
hits_trim = (
|
82
|
+
email_search_tools.search_emails(inbox="user", keywords=kws_full[:-1], max_results=5)
|
83
|
+
if len(kws_full) > 1
|
84
|
+
else []
|
85
|
+
)
|
86
|
+
|
87
|
+
rows.append(
|
88
|
+
{
|
89
|
+
"question": q,
|
90
|
+
"keywords_full": " ".join(kws_full),
|
91
|
+
"hits_full": len(hits_full),
|
92
|
+
"keywords_trim": " ".join(kws_full[:-1]),
|
93
|
+
"hits_trim": len(hits_trim),
|
94
|
+
}
|
95
|
+
)
|
96
|
+
|
97
|
+
# write CSV
|
98
|
+
out_path = Path("keyword_stats.csv")
|
99
|
+
with out_path.open("w", newline="") as f:
|
100
|
+
writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
|
101
|
+
writer.writeheader()
|
102
|
+
writer.writerows(rows)
|
103
|
+
|
104
|
+
print(f"Wrote {len(rows)} rows to {out_path.resolve()}")
|
105
|
+
|
106
|
+
|
107
|
+
if __name__ == "__main__":
|
108
|
+
parser = argparse.ArgumentParser()
|
109
|
+
parser.add_argument("--n", type=int, default=30, help="number of tasks to sample")
|
110
|
+
args = parser.parse_args()
|
111
|
+
asyncio.run(main(args.n))
|
@@ -0,0 +1,8 @@
|
|
1
|
+
import pytest
|
2
|
+
from synth_ai.environments.examples.enron.art_helpers.email_search_tools import search_emails
|
3
|
+
|
4
|
+
|
5
|
+
@pytest.mark.parametrize("kw", [["enron"]]) # , ["meeting"], ["energy"]
|
6
|
+
def test_index_has_hits(kw):
|
7
|
+
hits = search_emails(inbox="john.lavorato@enron.com", keywords=kw)
|
8
|
+
assert len(hits) > 0, f"no hits for {kw}"
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""MiniGrid environment example for synth_env.
|
2
|
+
|
3
|
+
This module provides a comprehensive implementation of MiniGrid environments
|
4
|
+
with full state management, tool-based interaction, and task generation.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from synth_ai.environments.examples.minigrid.engine import (
|
8
|
+
MiniGridEngine,
|
9
|
+
MiniGridPublicState,
|
10
|
+
MiniGridPrivateState,
|
11
|
+
MiniGridGoalReachedComponent,
|
12
|
+
MiniGridStepPenaltyComponent,
|
13
|
+
MiniGridObservationCallable,
|
14
|
+
MiniGridCheckpointObservationCallable,
|
15
|
+
)
|
16
|
+
from synth_ai.environments.examples.minigrid.environment import (
|
17
|
+
MiniGridEnvironment,
|
18
|
+
MiniGridInteractTool,
|
19
|
+
MiniGridActionInput,
|
20
|
+
)
|
21
|
+
from synth_ai.environments.examples.minigrid.taskset import (
|
22
|
+
MiniGridTaskInstance,
|
23
|
+
MiniGridTaskInstanceMetadata,
|
24
|
+
DEFAULT_MINIGRID_TASK,
|
25
|
+
create_minigrid_taskset,
|
26
|
+
taskset,
|
27
|
+
)
|
28
|
+
|
29
|
+
__all__ = [
|
30
|
+
# Engine
|
31
|
+
"MiniGridEngine",
|
32
|
+
"MiniGridPublicState",
|
33
|
+
"MiniGridPrivateState",
|
34
|
+
"MiniGridGoalReachedComponent",
|
35
|
+
"MiniGridStepPenaltyComponent",
|
36
|
+
"MiniGridObservationCallable",
|
37
|
+
"MiniGridCheckpointObservationCallable",
|
38
|
+
# Environment
|
39
|
+
"MiniGridEnvironment",
|
40
|
+
"MiniGridInteractTool",
|
41
|
+
"MiniGridActionInput",
|
42
|
+
# TaskSet
|
43
|
+
"MiniGridTaskInstance",
|
44
|
+
"MiniGridTaskInstanceMetadata",
|
45
|
+
"DEFAULT_MINIGRID_TASK",
|
46
|
+
"create_minigrid_taskset",
|
47
|
+
"taskset",
|
48
|
+
]
|