synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,165 @@
1
+ # environment.py
2
+ from __future__ import annotations
3
+ from typing import List, Optional, Dict, Any, Union
4
+ from pydantic import BaseModel, Field
5
+
6
+ from synth_ai.environments.environment.tools import (
7
+ EnvToolCall,
8
+ ToolResult,
9
+ TOOL_REGISTRY,
10
+ register_tool,
11
+ )
12
+ from synth_ai.environments.environment.shared_engine import (
13
+ GetObservationCallable,
14
+ InternalObservation,
15
+ )
16
+ from synth_ai.environments.stateful.core import StatefulEnvironment
17
+ from synth_ai.environments.examples.enron.engine import (
18
+ EnronEngine,
19
+ ACTION_SEARCH,
20
+ ACTION_READ,
21
+ ACTION_ANSWER,
22
+ )
23
+ from synth_ai.environments.examples.enron.taskset import EnronTaskInstance
24
+
25
+
26
+ # -------- pydantic schemas (used by agent / LLM function calls)
27
+ class SearchEmailsArgs(BaseModel):
28
+ inbox: str = Field(..., description="Email address performing the search (used by tool logic)")
29
+ keywords: List[str] = Field(..., description="Keywords to AND-search for")
30
+ from_addr: Optional[str] = None
31
+ to_addr: Optional[str] = None
32
+ sent_after: Optional[str] = None
33
+ sent_before: Optional[str] = None
34
+ max_results: int = Field(10, le=10)
35
+
36
+
37
+ class ReadEmailArgs(BaseModel):
38
+ message_id: str
39
+
40
+
41
+ class AnswerQuestionArgs(BaseModel):
42
+ answer: str
43
+
44
+
45
+ # --------------------------------------------------------------------------- tool wrappers
46
+ class SearchEmails(EnvToolCall):
47
+ def __init__(self, **kwargs):
48
+ self.action = (ACTION_SEARCH, kwargs)
49
+
50
+
51
+ class ReadEmail(EnvToolCall):
52
+ def __init__(self, message_id: str):
53
+ self.action = (ACTION_READ, message_id)
54
+
55
+
56
+ class AnswerQuestion(EnvToolCall):
57
+ def __init__(self, answer: str):
58
+ self.action = (ACTION_ANSWER, answer)
59
+
60
+
61
+ # -- terminate wrapper (maps to an empty-answer ACTION_ANSWER) --------------
62
+ class Terminate(EnvToolCall):
63
+ def __init__(self):
64
+ self.action = (ACTION_ANSWER, "")
65
+
66
+
67
+ # -------- observation callable (optional for formatted observations)
68
+ class SynthEnronObservationCallable(GetObservationCallable):
69
+ async def get_observation(
70
+ self, pub: Dict[str, Any], priv: Dict[str, Any]
71
+ ) -> InternalObservation:
72
+ """Format observation as a human-readable string."""
73
+ q = pub.get("question")
74
+ rwd = priv.get("reward_last")
75
+ return f"Q: {q}\nTools: {pub.get('tools')}\nAnswered: {pub.get('already_answered')}\nSearch Res: {len(pub.get('search_results', []))} items\nEmail Loaded: {pub.get('email') is not None}\nTool Error: {pub.get('tool_error')}\nReward Δ: {rwd}"
76
+
77
+
78
+ # --------------------------------------------------------------------------- environment
79
+ class EnronEnvironment(StatefulEnvironment):
80
+ def __init__(
81
+ self,
82
+ task_instance: EnronTaskInstance,
83
+ custom_obs: Optional[GetObservationCallable] = None,
84
+ ):
85
+ self.engine = EnronEngine(task_instance)
86
+ self.custom_obs = custom_obs or SynthEnronObservationCallable()
87
+ self.name = "Enron-QA-Env"
88
+
89
+ # Store tool instances on self for reliable access
90
+ self._tools_instances = {
91
+ "search_emails": SearchEmailsTool(self.engine),
92
+ "read_email": ReadEmailTool(self.engine),
93
+ "answer_question": AnswerQuestionTool(self.engine),
94
+ "terminate": TerminateTool(self.engine),
95
+ }
96
+ for tool_name, tool_instance in self._tools_instances.items():
97
+ if tool_name not in TOOL_REGISTRY:
98
+ register_tool(tool_instance)
99
+ elif TOOL_REGISTRY[tool_name].engine is not self.engine:
100
+ register_tool(tool_instance)
101
+
102
+ async def initialize(self) -> InternalObservation:
103
+ priv, pub = await self.engine._reset_engine()
104
+ return await self._obs(priv, pub)
105
+
106
+ async def step(
107
+ self,
108
+ calls: Union[EnvToolCall, List[EnvToolCall], List[List[EnvToolCall]]],
109
+ ) -> InternalObservation:
110
+ # normalise → always [[EnvToolCall]]
111
+ if isinstance(calls, EnvToolCall):
112
+ calls = [[calls]]
113
+ elif calls and isinstance(calls[0], EnvToolCall):
114
+ calls = [calls]
115
+
116
+ if not isinstance(calls[0][0], EnvToolCall):
117
+ raise TypeError(f"Processed call is not EnvToolCall: {type(calls[0][0])}")
118
+
119
+ tool_name = calls[0][0].tool
120
+ tool_to_execute = self._tools_instances.get(tool_name)
121
+
122
+ if not tool_to_execute:
123
+ tool_to_execute = TOOL_REGISTRY.get(tool_name)
124
+ if not tool_to_execute:
125
+ raise ValueError(f"Tool '{tool_name}' not found.")
126
+
127
+ tool_result: ToolResult = await tool_to_execute(calls[0][0])
128
+
129
+ public_payload_for_engine = (
130
+ tool_result.payload if tool_result.ok and tool_result.payload else {}
131
+ )
132
+ if not tool_result.ok:
133
+ public_payload_for_engine["tool_error"] = tool_result.error
134
+
135
+ priv, pub = await self.engine._step_engine(public_payload_for_engine)
136
+ return await self._obs(priv, pub)
137
+
138
+ async def terminate(self) -> InternalObservation:
139
+ self.engine.close_db()
140
+ priv_state_on_terminate = {
141
+ "reward_last": 0,
142
+ "total_reward": self.engine.total_reward,
143
+ "terminated": True,
144
+ "truncated": False,
145
+ "gold_answer": self.engine._sample()["answer"],
146
+ }
147
+ pub_state_on_terminate = {
148
+ "question": self.engine._sample()["question"],
149
+ "tools": [],
150
+ "already_answered": self.engine.answered,
151
+ "status": "terminated_by_env",
152
+ }
153
+ return await self._obs(priv_state_on_terminate, pub_state_on_terminate)
154
+
155
+ async def checkpoint(self) -> InternalObservation:
156
+ snapshot = await self.engine._serialize_engine()
157
+ return {
158
+ "engine_snapshot": snapshot.model_dump(),
159
+ "message": "Checkpoint created",
160
+ }
161
+
162
+ async def _obs(self, priv: Dict[str, Any], pub: Dict[str, Any]):
163
+ if self.custom_obs:
164
+ return await self.custom_obs.get_observation(pub, priv)
165
+ return {**pub, **priv}
@@ -0,0 +1,112 @@
1
+ # taskset.py
2
+ from __future__ import annotations
3
+ import asyncio
4
+ from uuid import uuid4
5
+ import os
6
+
7
+ from datasets import load_dataset
8
+ from dataclasses import dataclass, asdict
9
+
10
+ from synth_ai.environments.tasks.core import (
11
+ Task,
12
+ TaskInstance,
13
+ TaskInstanceSet,
14
+ TaskInstanceMetadata,
15
+ SplitInfo,
16
+ Impetus,
17
+ Intent,
18
+ )
19
+
20
+ enron_task = Task(
21
+ global_premises="Answer factual questions by reading Enron e-mails",
22
+ global_constraints="",
23
+ global_objectives="Provide the correct answer; minimise queries",
24
+ shared_env_params={},
25
+ )
26
+
27
+
28
+ # --------------------------------------------------------------------------- metadata
29
+ @dataclass
30
+ class EnronTaskInstanceMetadata(TaskInstanceMetadata):
31
+ split: str
32
+ email_count: int
33
+ message_ids: list[str]
34
+
35
+
36
+ @dataclass
37
+ class EnronTaskInstance(TaskInstance):
38
+ async def serialize(self):
39
+ data = asdict(self)
40
+ if isinstance(data.get("id"), uuid4().__class__):
41
+ data["id"] = str(data["id"])
42
+ return data
43
+
44
+ @classmethod
45
+ async def deserialize(cls, data: dict) -> "EnronTaskInstance":
46
+ return cls(**data)
47
+
48
+
49
+ # --------------------------------------------------------------------------- task-set builder
50
+ # Use a local dataset cache under examples/enron/dataset
51
+ CACHE_DIR = os.path.join(os.path.dirname(__file__), "dataset")
52
+ os.makedirs(CACHE_DIR, exist_ok=True)
53
+
54
+
55
+ async def create_enron_taskset() -> TaskInstanceSet:
56
+ ds_train = load_dataset(
57
+ "corbt/enron_emails_sample_questions",
58
+ split="train",
59
+ cache_dir=CACHE_DIR,
60
+ )
61
+ ds_test = load_dataset(
62
+ "corbt/enron_emails_sample_questions",
63
+ split="test",
64
+ cache_dir=CACHE_DIR,
65
+ )
66
+
67
+ def to_instance(row: dict, split: str) -> EnronTaskInstance:
68
+ impetus = Impetus(instructions=row["question"])
69
+ intent = Intent(
70
+ rubric={"goal": "Answer the question using the Enron emails."},
71
+ gold_trajectories=None,
72
+ gold_state_diff={"answer": row["answer"]},
73
+ )
74
+ metadata = EnronTaskInstanceMetadata(
75
+ split=split,
76
+ email_count=len(row["message_ids"]),
77
+ message_ids=row["message_ids"],
78
+ )
79
+ return EnronTaskInstance(
80
+ id=uuid4(),
81
+ impetus=impetus,
82
+ intent=intent,
83
+ metadata=metadata,
84
+ is_reproducible=True,
85
+ initial_engine_snapshot=row,
86
+ )
87
+
88
+ train_instances = [to_instance(r, "train") for r in ds_train]
89
+ test_instances = [to_instance(r, "test") for r in ds_test]
90
+
91
+ split_info = SplitInfo(
92
+ val_instance_ids=set(),
93
+ test_instance_ids={inst.id for inst in test_instances},
94
+ _is_split_defined=True,
95
+ )
96
+
97
+ return TaskInstanceSet(
98
+ name="Enron-QA",
99
+ description="QA over Enron email dataset sample.",
100
+ instances=train_instances + test_instances,
101
+ split_info=split_info,
102
+ )
103
+
104
+
105
+ # quick sanity check ----------------------------------------------------------
106
+ if __name__ == "__main__":
107
+
108
+ async def _main():
109
+ ts = await create_enron_taskset()
110
+ print(f"{len(ts.instances)} instances built.")
111
+
112
+ asyncio.run(_main())
@@ -0,0 +1,111 @@
1
+ """
2
+ Script: enron_keyword_logging.py
3
+ Purpose: Iterate over a sample of Enron-QA tasks and compare the hit-rate of the
4
+ full keyword list extracted from the natural-language question with the hit-rate
5
+ when the *final* keyword is dropped (the heuristic your current agent uses).
6
+
7
+ It logs the result counts side-by-side so you can see whether the heuristic is
8
+ generally helpful or not.
9
+
10
+ Run with:
11
+ python enron_keyword_logging.py --n 50 # test 50 random tasks
12
+ Outputs a CSV "keyword_stats.csv" for easy inspection in Excel/Sheets.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import csv
19
+ import re
20
+ import random
21
+ import asyncio
22
+ from pathlib import Path
23
+ from synth_ai.environments.examples.enron.taskset import create_enron_taskset
24
+ from synth_ai.environments.examples.enron.art_helpers import email_search_tools # low-level search
25
+
26
+ # --- simple helpers ---------------------------------------------------------
27
+ STOPWORDS = {
28
+ "the",
29
+ "a",
30
+ "an",
31
+ "and",
32
+ "or",
33
+ "of",
34
+ "at",
35
+ "in",
36
+ "on",
37
+ "for",
38
+ "to",
39
+ "with",
40
+ "my",
41
+ "your",
42
+ "our",
43
+ "did",
44
+ "do",
45
+ "is",
46
+ "was",
47
+ "were",
48
+ "be",
49
+ "been",
50
+ "am",
51
+ "when",
52
+ "what",
53
+ "which",
54
+ "who",
55
+ }
56
+
57
+ TOKEN_RE = re.compile(r"[A-Za-z0-9']+")
58
+
59
+
60
+ def extract_keywords(question: str) -> list[str]:
61
+ """Very naive keyword extractor: tokens minus stop-words."""
62
+ tokens = [t.lower() for t in TOKEN_RE.findall(question)]
63
+ return [t for t in tokens if t not in STOPWORDS]
64
+
65
+
66
+ # ---------------------------------------------------------------------------
67
+ async def main(n: int):
68
+ taskset = await create_enron_taskset()
69
+ sample = random.sample(taskset.instances, k=min(n, len(taskset.instances)))
70
+
71
+ rows: list[dict[str, str | int]] = []
72
+ for inst in sample:
73
+ q = inst.impetus.instructions
74
+ kws_full = extract_keywords(q)
75
+ if not kws_full:
76
+ continue
77
+
78
+ # search using the low-level helper once so we don't need a whole env
79
+ hits_full = email_search_tools.search_emails(inbox="user", keywords=kws_full, max_results=5)
80
+
81
+ hits_trim = (
82
+ email_search_tools.search_emails(inbox="user", keywords=kws_full[:-1], max_results=5)
83
+ if len(kws_full) > 1
84
+ else []
85
+ )
86
+
87
+ rows.append(
88
+ {
89
+ "question": q,
90
+ "keywords_full": " ".join(kws_full),
91
+ "hits_full": len(hits_full),
92
+ "keywords_trim": " ".join(kws_full[:-1]),
93
+ "hits_trim": len(hits_trim),
94
+ }
95
+ )
96
+
97
+ # write CSV
98
+ out_path = Path("keyword_stats.csv")
99
+ with out_path.open("w", newline="") as f:
100
+ writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
101
+ writer.writeheader()
102
+ writer.writerows(rows)
103
+
104
+ print(f"Wrote {len(rows)} rows to {out_path.resolve()}")
105
+
106
+
107
+ if __name__ == "__main__":
108
+ parser = argparse.ArgumentParser()
109
+ parser.add_argument("--n", type=int, default=30, help="number of tasks to sample")
110
+ args = parser.parse_args()
111
+ asyncio.run(main(args.n))
@@ -0,0 +1,8 @@
1
+ import pytest
2
+ from synth_ai.environments.examples.enron.art_helpers.email_search_tools import search_emails
3
+
4
+
5
+ @pytest.mark.parametrize("kw", [["enron"]]) # , ["meeting"], ["energy"]
6
+ def test_index_has_hits(kw):
7
+ hits = search_emails(inbox="john.lavorato@enron.com", keywords=kw)
8
+ assert len(hits) > 0, f"no hits for {kw}"
@@ -0,0 +1,48 @@
1
+ """MiniGrid environment example for synth_env.
2
+
3
+ This module provides a comprehensive implementation of MiniGrid environments
4
+ with full state management, tool-based interaction, and task generation.
5
+ """
6
+
7
+ from synth_ai.environments.examples.minigrid.engine import (
8
+ MiniGridEngine,
9
+ MiniGridPublicState,
10
+ MiniGridPrivateState,
11
+ MiniGridGoalReachedComponent,
12
+ MiniGridStepPenaltyComponent,
13
+ MiniGridObservationCallable,
14
+ MiniGridCheckpointObservationCallable,
15
+ )
16
+ from synth_ai.environments.examples.minigrid.environment import (
17
+ MiniGridEnvironment,
18
+ MiniGridInteractTool,
19
+ MiniGridActionInput,
20
+ )
21
+ from synth_ai.environments.examples.minigrid.taskset import (
22
+ MiniGridTaskInstance,
23
+ MiniGridTaskInstanceMetadata,
24
+ DEFAULT_MINIGRID_TASK,
25
+ create_minigrid_taskset,
26
+ taskset,
27
+ )
28
+
29
+ __all__ = [
30
+ # Engine
31
+ "MiniGridEngine",
32
+ "MiniGridPublicState",
33
+ "MiniGridPrivateState",
34
+ "MiniGridGoalReachedComponent",
35
+ "MiniGridStepPenaltyComponent",
36
+ "MiniGridObservationCallable",
37
+ "MiniGridCheckpointObservationCallable",
38
+ # Environment
39
+ "MiniGridEnvironment",
40
+ "MiniGridInteractTool",
41
+ "MiniGridActionInput",
42
+ # TaskSet
43
+ "MiniGridTaskInstance",
44
+ "MiniGridTaskInstanceMetadata",
45
+ "DEFAULT_MINIGRID_TASK",
46
+ "create_minigrid_taskset",
47
+ "taskset",
48
+ ]