synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. synth_ai/__init__.py +28 -2
  2. synth_ai/core/system.py +4 -0
  3. synth_ai/environments/__init__.py +35 -0
  4. synth_ai/environments/environment/__init__.py +1 -0
  5. synth_ai/environments/environment/artifacts/__init__.py +1 -0
  6. synth_ai/environments/environment/artifacts/base.py +50 -0
  7. synth_ai/environments/environment/core.py +22 -0
  8. synth_ai/environments/environment/db/__init__.py +1 -0
  9. synth_ai/environments/environment/db/sqlite.py +45 -0
  10. synth_ai/environments/environment/registry.py +24 -0
  11. synth_ai/environments/environment/resources/sqlite.py +46 -0
  12. synth_ai/environments/environment/results.py +1 -0
  13. synth_ai/environments/environment/rewards/__init__.py +1 -0
  14. synth_ai/environments/environment/rewards/core.py +28 -0
  15. synth_ai/environments/environment/shared_engine.py +26 -0
  16. synth_ai/environments/environment/tools/__init__.py +34 -0
  17. synth_ai/environments/examples/__init__.py +1 -0
  18. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
  26. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  27. synth_ai/environments/examples/crafter_classic/engine.py +502 -0
  28. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  29. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  30. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  31. synth_ai/environments/examples/crafter_classic/environment.py +255 -0
  32. synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
  33. synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
  34. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  35. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  36. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  37. synth_ai/environments/examples/enron/engine.py +291 -0
  38. synth_ai/environments/examples/enron/environment.py +165 -0
  39. synth_ai/environments/examples/enron/taskset.py +112 -0
  40. synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
  41. synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
  42. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  43. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  44. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
  45. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  46. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
  47. synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
  48. synth_ai/environments/examples/minigrid/engine.py +589 -0
  49. synth_ai/environments/examples/minigrid/environment.py +274 -0
  50. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  51. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  52. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  53. synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
  54. synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
  55. synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
  56. synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
  57. synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
  58. synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
  59. synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
  60. synth_ai/environments/examples/nethack/__init__.py +7 -0
  61. synth_ai/environments/examples/nethack/achievements.py +337 -0
  62. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  63. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  64. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
  65. synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
  66. synth_ai/environments/examples/nethack/engine.py +738 -0
  67. synth_ai/environments/examples/nethack/environment.py +255 -0
  68. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  69. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  70. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  71. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  72. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  73. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  74. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  75. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  76. synth_ai/environments/examples/nethack/taskset.py +323 -0
  77. synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
  78. synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
  79. synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
  80. synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
  81. synth_ai/environments/examples/red/__init__.py +7 -0
  82. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  83. synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
  84. synth_ai/environments/examples/red/config_logging.py +110 -0
  85. synth_ai/environments/examples/red/engine.py +693 -0
  86. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  87. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  88. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  89. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  90. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  91. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  92. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  93. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  94. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  95. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  96. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  97. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  98. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  99. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  100. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  101. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  102. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  103. synth_ai/environments/examples/red/environment.py +235 -0
  104. synth_ai/environments/examples/red/taskset.py +77 -0
  105. synth_ai/environments/examples/red/test_fixes.py +125 -0
  106. synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
  107. synth_ai/environments/examples/red/units/__init__.py +1 -0
  108. synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
  109. synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
  110. synth_ai/environments/examples/red/units/test_engine.py +192 -0
  111. synth_ai/environments/examples/red/units/test_environment.py +455 -0
  112. synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
  113. synth_ai/environments/examples/red/units/test_integration.py +217 -0
  114. synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
  115. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
  116. synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
  117. synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
  118. synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
  119. synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
  120. synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
  121. synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
  122. synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
  123. synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
  124. synth_ai/environments/examples/red/units/test_taskset.py +116 -0
  125. synth_ai/environments/examples/red/units/test_tree.py +448 -0
  126. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  127. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
  128. synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
  129. synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
  130. synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
  131. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
  132. synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
  133. synth_ai/environments/examples/sokoban/engine.py +675 -0
  134. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  135. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  136. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  137. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  138. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  139. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  140. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  141. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  142. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  143. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  144. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  145. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  146. synth_ai/environments/examples/sokoban/environment.py +228 -0
  147. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  148. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  149. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  150. synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
  151. synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
  152. synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
  153. synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
  154. synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
  155. synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
  156. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  157. synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
  158. synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
  159. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  160. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  161. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  162. synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
  163. synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
  164. synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
  165. synth_ai/environments/examples/verilog/__init__.py +10 -0
  166. synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
  167. synth_ai/environments/examples/verilog/engine.py +328 -0
  168. synth_ai/environments/examples/verilog/environment.py +349 -0
  169. synth_ai/environments/examples/verilog/taskset.py +418 -0
  170. synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
  171. synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
  172. synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
  173. synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
  174. synth_ai/environments/reproducibility/core.py +42 -0
  175. synth_ai/environments/reproducibility/tree.py +364 -0
  176. synth_ai/environments/service/app.py +78 -0
  177. synth_ai/environments/service/core_routes.py +775 -0
  178. synth_ai/environments/service/external_registry.py +57 -0
  179. synth_ai/environments/service/registry.py +9 -0
  180. synth_ai/environments/stateful/__init__.py +1 -0
  181. synth_ai/environments/stateful/core.py +28 -0
  182. synth_ai/environments/stateful/engine.py +21 -0
  183. synth_ai/environments/stateful/state.py +7 -0
  184. synth_ai/environments/tasks/api.py +19 -0
  185. synth_ai/environments/tasks/core.py +78 -0
  186. synth_ai/environments/tasks/filters.py +39 -0
  187. synth_ai/environments/tasks/utils.py +89 -0
  188. synth_ai/environments/v0_observability/history.py +3 -0
  189. synth_ai/environments/v0_observability/log.py +2 -0
  190. synth_ai/lm/caching/constants.py +1 -0
  191. synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
  192. synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
  193. synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
  194. synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
  195. synth_ai/{zyk/lms → lm}/config.py +2 -1
  196. synth_ai/{zyk/lms → lm}/constants.py +2 -2
  197. synth_ai/{zyk/lms → lm}/core/all.py +10 -10
  198. synth_ai/{zyk/lms → lm}/core/main.py +57 -33
  199. synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
  200. synth_ai/lm/cost/monitor.py +1 -0
  201. synth_ai/lm/cost/statefulness.py +1 -0
  202. synth_ai/lm/provider_support/__init__.py +8 -0
  203. synth_ai/lm/provider_support/anthropic.py +945 -0
  204. synth_ai/lm/provider_support/openai.py +1115 -0
  205. synth_ai/lm/provider_support/suppress_logging.py +31 -0
  206. synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
  207. synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
  208. synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
  209. synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
  210. synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
  211. synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
  212. synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
  213. synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
  214. synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
  215. synth_ai/lm/vendors/supported/__init__.py +0 -0
  216. synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
  217. synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
  218. synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
  219. synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
  220. synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
  221. synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
  222. synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
  223. synth_ai/tracing/__init__.py +0 -0
  224. synth_ai/tracing/abstractions.py +224 -0
  225. synth_ai/tracing/base_client.py +91 -0
  226. synth_ai/tracing/client_manager.py +131 -0
  227. synth_ai/tracing/config.py +140 -0
  228. synth_ai/tracing/context.py +146 -0
  229. synth_ai/tracing/decorators.py +679 -0
  230. synth_ai/tracing/events/__init__.py +0 -0
  231. synth_ai/tracing/events/manage.py +147 -0
  232. synth_ai/tracing/events/scope.py +86 -0
  233. synth_ai/tracing/events/store.py +227 -0
  234. synth_ai/tracing/immediate_client.py +152 -0
  235. synth_ai/tracing/local.py +18 -0
  236. synth_ai/tracing/log_client_base.py +74 -0
  237. synth_ai/tracing/retry_queue.py +187 -0
  238. synth_ai/tracing/trackers.py +515 -0
  239. synth_ai/tracing/upload.py +504 -0
  240. synth_ai/tracing/utils.py +9 -0
  241. synth_ai/zyk/__init__.py +28 -2
  242. synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
  243. synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
  244. synth_ai/zyk/lms/caching/constants.py +0 -1
  245. synth_ai/zyk/lms/cost/monitor.py +0 -1
  246. synth_ai/zyk/lms/cost/statefulness.py +0 -1
  247. synth_ai-0.1.9.dist-info/METADATA +0 -37
  248. synth_ai-0.1.9.dist-info/RECORD +0 -50
  249. /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
  250. /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
  251. /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
  252. /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
  253. /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
  254. /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
  255. /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
  256. /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
  257. /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
  258. /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
  259. /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
  260. /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
  261. /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
  262. /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
  263. /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
  264. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
  265. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
  266. {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,214 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+ from dataclasses import dataclass
5
+ from typing import List
6
+ import numpy as np
7
+
8
+ from uuid import uuid4
9
+ from synth_ai.environments.tasks.core import (
10
+ TaskInstance,
11
+ TaskInstanceMetadata,
12
+ TaskInstanceSet,
13
+ Impetus,
14
+ Intent,
15
+ SplitInfo,
16
+ )
17
+
18
+ from .engine import COORD_TO_IDX, WIN_PATTERNS, PLAYER_MARKS
19
+
20
+
21
+ @dataclass
22
+ class TicTacToeTaskInstanceMetadata(TaskInstanceMetadata):
23
+ starting_player: str # "X" or "O"
24
+ opening_moves: List[str] # Pre-made moves to create position
25
+ optimal_outcome: str # "win", "draw", "loss" for starting player
26
+ position_complexity: int # Number of pre-moves made
27
+ shortest_win_length: int # Min moves to force win/draw
28
+
29
+
30
+ @dataclass
31
+ class TicTacToeTaskInstance(TaskInstance):
32
+ async def serialize(self) -> dict:
33
+ return {
34
+ "id": str(self.id),
35
+ "impetus": {"instructions": self.impetus.instructions},
36
+ "intent": {
37
+ "rubric": self.intent.rubric,
38
+ "gold_trajectories": self.intent.gold_trajectories,
39
+ "gold_state_diff": self.intent.gold_state_diff,
40
+ },
41
+ "metadata": {
42
+ "starting_player": self.metadata.starting_player,
43
+ "opening_moves": self.metadata.opening_moves,
44
+ "optimal_outcome": self.metadata.optimal_outcome,
45
+ "position_complexity": self.metadata.position_complexity,
46
+ "shortest_win_length": self.metadata.shortest_win_length,
47
+ },
48
+ "is_reproducible": self.is_reproducible,
49
+ "initial_engine_snapshot": self.initial_engine_snapshot,
50
+ }
51
+
52
+ @classmethod
53
+ async def deserialize(cls, data: dict) -> "TicTacToeTaskInstance":
54
+ from uuid import UUID
55
+
56
+ metadata = TicTacToeTaskInstanceMetadata(
57
+ starting_player=data["metadata"]["starting_player"],
58
+ opening_moves=data["metadata"]["opening_moves"],
59
+ optimal_outcome=data["metadata"]["optimal_outcome"],
60
+ position_complexity=data["metadata"]["position_complexity"],
61
+ shortest_win_length=data["metadata"]["shortest_win_length"],
62
+ )
63
+
64
+ return cls(
65
+ id=UUID(data["id"]),
66
+ impetus=Impetus(instructions=data["impetus"]["instructions"]),
67
+ intent=Intent(
68
+ rubric=data["intent"]["rubric"],
69
+ gold_trajectories=data["intent"]["gold_trajectories"],
70
+ gold_state_diff=data["intent"]["gold_state_diff"],
71
+ ),
72
+ metadata=metadata,
73
+ is_reproducible=data["is_reproducible"],
74
+ initial_engine_snapshot=data["initial_engine_snapshot"],
75
+ )
76
+
77
+
78
+ def _evaluate_position(board: np.ndarray, player: int) -> str:
79
+ """Simple evaluation of position outcome with perfect play"""
80
+ # Check for immediate win
81
+ for pattern in WIN_PATTERNS:
82
+ values = [board[i] for i in pattern]
83
+ if values.count(player) == 3:
84
+ return "win"
85
+ if values.count(3 - player) == 3:
86
+ return "loss"
87
+
88
+ # Check if board is full
89
+ if np.all(board != 0):
90
+ return "draw"
91
+
92
+ # For simplicity, assume draw for non-terminal positions
93
+ # In a real implementation, this would use minimax
94
+ return "draw"
95
+
96
+
97
+ def _count_shortest_win(board: np.ndarray, player: int) -> int:
98
+ """Count minimum moves to force a win/draw"""
99
+ # Simplified: return remaining empty cells
100
+ empty_cells = sum(1 for i in range(9) if board[i] == 0)
101
+ return max(1, empty_cells // 2)
102
+
103
+
104
+ async def create_tictactoe_taskset() -> TaskInstanceSet:
105
+ """Generate diverse TicTacToe starting positions"""
106
+ instances = []
107
+
108
+ # Configuration for different position types
109
+ POSITION_CONFIGS = {
110
+ "opening": {"pre_moves": 0, "count": 10}, # Fresh games
111
+ "early": {"pre_moves": 1, "count": 15}, # After 1 move
112
+ "mid": {"pre_moves": 2, "count": 15}, # After 2 moves
113
+ "complex": {"pre_moves": 3, "count": 10}, # After 3 moves
114
+ }
115
+
116
+ all_coords = list(COORD_TO_IDX.keys())
117
+
118
+ for config_name, config in POSITION_CONFIGS.items():
119
+ for i in range(config["count"]):
120
+ # Generate random opening moves
121
+ opening_moves = []
122
+ board = np.zeros(9, dtype=int)
123
+ current_player = "X"
124
+
125
+ # Make pre-moves
126
+ available_coords = all_coords.copy()
127
+ for move_idx in range(config["pre_moves"]):
128
+ if not available_coords:
129
+ break
130
+
131
+ # Random move
132
+ move = random.choice(available_coords)
133
+ opening_moves.append(move)
134
+ available_coords.remove(move)
135
+
136
+ # Update board
137
+ board[COORD_TO_IDX[move]] = PLAYER_MARKS[current_player]
138
+ current_player = "O" if current_player == "X" else "X"
139
+
140
+ # Evaluate position
141
+ starting_player = current_player
142
+ optimal_outcome = _evaluate_position(board, PLAYER_MARKS[starting_player])
143
+ shortest_win = _count_shortest_win(board, PLAYER_MARKS[starting_player])
144
+
145
+ # Create metadata
146
+ metadata = TicTacToeTaskInstanceMetadata(
147
+ starting_player=starting_player,
148
+ opening_moves=opening_moves,
149
+ optimal_outcome=optimal_outcome,
150
+ position_complexity=config["pre_moves"],
151
+ shortest_win_length=shortest_win,
152
+ )
153
+
154
+ # Create instance
155
+ impetus = Impetus(
156
+ instructions=(
157
+ f"You are playing TicTacToe as {starting_player}. "
158
+ + "The game is played on a 3x3 grid with cells labeled A1-A3, B1-B3, C1-C3. "
159
+ + (
160
+ f"The game has already had {len(opening_moves)} moves."
161
+ if opening_moves
162
+ else "This is a fresh game."
163
+ )
164
+ + f" You must place your mark ({starting_player}) in an empty cell. "
165
+ + "Win by getting three of your marks in a row (horizontally, vertically, or diagonally)."
166
+ )
167
+ )
168
+
169
+ intent = Intent(
170
+ rubric={"goal": f"Win the game as {starting_player}, or at least force a draw"},
171
+ gold_trajectories=None,
172
+ gold_state_diff={"optimal_outcome": optimal_outcome},
173
+ )
174
+
175
+ instance = TicTacToeTaskInstance(
176
+ id=uuid4(),
177
+ impetus=impetus,
178
+ intent=intent,
179
+ metadata=metadata,
180
+ is_reproducible=True,
181
+ initial_engine_snapshot=None,
182
+ )
183
+
184
+ instances.append(instance)
185
+
186
+ # Shuffle instances
187
+ random.shuffle(instances)
188
+
189
+ # Define splits based on complexity
190
+ val_ids = {inst.id for inst in instances if inst.metadata.position_complexity == 1}
191
+ test_ids = {inst.id for inst in instances if inst.metadata.position_complexity >= 2}
192
+
193
+ # If not enough instances for splits, use simple division
194
+ if len(val_ids) == 0 or len(test_ids) == 0:
195
+ total = len(instances)
196
+ val_end = int(total * 0.15)
197
+ test_end = int(total * 0.30)
198
+ val_ids = {instances[i].id for i in range(val_end)}
199
+ test_ids = {instances[i].id for i in range(val_end, test_end)}
200
+
201
+ split_info = SplitInfo(
202
+ val_instance_ids=val_ids, test_instance_ids=test_ids, _is_split_defined=True
203
+ )
204
+
205
+ return TaskInstanceSet(
206
+ name="TicTacToe Procedural TaskSet",
207
+ description="Procedurally generated TicTacToe tasks with varying starting positions.",
208
+ instances=instances,
209
+ split_info=split_info,
210
+ )
211
+
212
+
213
+ # Make taskset available as module attribute
214
+ taskset = create_tictactoe_taskset
@@ -0,0 +1,393 @@
1
+ import pytest
2
+ import numpy as np
3
+ from uuid import uuid4
4
+
5
+ from synth_ai.environments.tasks.core import TaskInstance, Impetus, Intent
6
+ from synth_ai.environments.examples.tictactoe.engine import (
7
+ TicTacToeEngine,
8
+ TicTacToePublicState,
9
+ TicTacToePrivateState,
10
+ TicTacToeWinComponent,
11
+ TicTacToeDrawComponent,
12
+ TicTacToeIllegalMoveComponent,
13
+ COORD_TO_IDX,
14
+ IDX_TO_COORD,
15
+ WIN_PATTERNS,
16
+ PLAYER_MARKS,
17
+ MARK_TO_PLAYER,
18
+ )
19
+ from synth_ai.environments.examples.tictactoe.taskset import (
20
+ TicTacToeTaskInstance,
21
+ TicTacToeTaskInstanceMetadata,
22
+ )
23
+
24
+
25
+ @pytest.fixture
26
+ def simple_task_instance():
27
+ """Create a simple task instance for testing."""
28
+ metadata = TicTacToeTaskInstanceMetadata(
29
+ starting_player="X",
30
+ opening_moves=[],
31
+ optimal_outcome="draw",
32
+ position_complexity=0,
33
+ shortest_win_length=5,
34
+ )
35
+
36
+ return TicTacToeTaskInstance(
37
+ id=uuid4(),
38
+ impetus=Impetus(instructions="Test TicTacToe game"),
39
+ intent=Intent(rubric={"goal": "Test game"}, gold_trajectories=None, gold_state_diff={}),
40
+ metadata=metadata,
41
+ is_reproducible=True,
42
+ initial_engine_snapshot=None,
43
+ )
44
+
45
+
46
+ @pytest.fixture
47
+ def task_with_premoves():
48
+ """Create a task instance with pre-moves."""
49
+ metadata = TicTacToeTaskInstanceMetadata(
50
+ starting_player="O",
51
+ opening_moves=["A1", "B2"],
52
+ optimal_outcome="win",
53
+ position_complexity=2,
54
+ shortest_win_length=3,
55
+ )
56
+
57
+ return TicTacToeTaskInstance(
58
+ id=uuid4(),
59
+ impetus=Impetus(instructions="Test TicTacToe with premoves"),
60
+ intent=Intent(rubric={"goal": "Test game"}, gold_trajectories=None, gold_state_diff={}),
61
+ metadata=metadata,
62
+ is_reproducible=True,
63
+ initial_engine_snapshot=None,
64
+ )
65
+
66
+
67
+ class TestTicTacToeEngine:
68
+ @pytest.mark.asyncio
69
+ async def test_engine_initialization(self, simple_task_instance):
70
+ """Test engine initializes correctly."""
71
+ engine = TicTacToeEngine(simple_task_instance)
72
+
73
+ assert engine.current_player == "X"
74
+ assert engine.move_count == 0
75
+ assert engine.winner is None
76
+ assert not engine.terminated
77
+ assert engine.total_reward == 0.0
78
+ assert np.all(engine.board == 0)
79
+
80
+ @pytest.mark.asyncio
81
+ async def test_engine_with_premoves(self, task_with_premoves):
82
+ """Test engine applies pre-moves correctly."""
83
+ engine = TicTacToeEngine(task_with_premoves)
84
+
85
+ # Check pre-moves were applied
86
+ assert engine.board[COORD_TO_IDX["A1"]] == PLAYER_MARKS["X"]
87
+ assert engine.board[COORD_TO_IDX["B2"]] == PLAYER_MARKS["O"]
88
+ assert engine.current_player == "X" # After 2 moves, back to X
89
+ assert engine.move_count == 2
90
+
91
+ @pytest.mark.asyncio
92
+ async def test_reset_engine(self, simple_task_instance):
93
+ """Test engine reset functionality."""
94
+ engine = TicTacToeEngine(simple_task_instance)
95
+
96
+ # Make a move first
97
+ await engine._step_engine("B2")
98
+
99
+ # Reset
100
+ priv, pub = await engine._reset_engine()
101
+
102
+ assert pub.current_player == "X"
103
+ assert pub.move_count == 0
104
+ assert pub.winner is None
105
+ assert not pub.terminated
106
+ assert np.all(pub.board == 0)
107
+ assert priv.total_reward == 0.0
108
+
109
+ @pytest.mark.asyncio
110
+ async def test_valid_moves(self, simple_task_instance):
111
+ """Test making valid moves."""
112
+ engine = TicTacToeEngine(simple_task_instance)
113
+
114
+ # Make first move
115
+ priv, pub = await engine._step_engine("B2")
116
+
117
+ assert pub.last_move == "B2"
118
+ assert pub.board[COORD_TO_IDX["B2"]] == PLAYER_MARKS["X"]
119
+ assert pub.current_player == "O"
120
+ assert pub.move_count == 1
121
+ assert not pub.terminated
122
+
123
+ # Make second move
124
+ priv, pub = await engine._step_engine("A1")
125
+
126
+ assert pub.last_move == "A1"
127
+ assert pub.board[COORD_TO_IDX["A1"]] == PLAYER_MARKS["O"]
128
+ assert pub.current_player == "X"
129
+ assert pub.move_count == 2
130
+
131
+ @pytest.mark.asyncio
132
+ async def test_invalid_moves(self, simple_task_instance):
133
+ """Test handling of invalid moves."""
134
+ engine = TicTacToeEngine(simple_task_instance)
135
+
136
+ # Make a valid move first
137
+ await engine._step_engine("B2")
138
+
139
+ # Try to make move in occupied cell
140
+ priv, pub = await engine._step_engine("B2")
141
+
142
+ assert pub.terminated
143
+ assert priv.reward_last == -1.0 # Illegal move penalty
144
+
145
+ # Test invalid coordinate
146
+ engine = TicTacToeEngine(simple_task_instance)
147
+ priv, pub = await engine._step_engine("Z9")
148
+
149
+ assert pub.terminated
150
+ assert priv.reward_last == -1.0
151
+
152
+ @pytest.mark.asyncio
153
+ async def test_win_detection_row(self, simple_task_instance):
154
+ """Test detecting wins in rows."""
155
+ engine = TicTacToeEngine(simple_task_instance)
156
+
157
+ # X wins in top row
158
+ await engine._step_engine("A1") # X
159
+ await engine._step_engine("B1") # O
160
+ await engine._step_engine("A2") # X
161
+ await engine._step_engine("B2") # O
162
+ priv, pub = await engine._step_engine("A3") # X wins
163
+
164
+ assert pub.winner == "X"
165
+ assert pub.terminated
166
+ assert priv.reward_last == 1.0 # Win reward
167
+
168
+ @pytest.mark.asyncio
169
+ async def test_win_detection_column(self, simple_task_instance):
170
+ """Test detecting wins in columns."""
171
+ engine = TicTacToeEngine(simple_task_instance)
172
+
173
+ # X wins in first column
174
+ await engine._step_engine("A1") # X
175
+ await engine._step_engine("A2") # O
176
+ await engine._step_engine("B1") # X
177
+ await engine._step_engine("B2") # O
178
+ priv, pub = await engine._step_engine("C1") # X wins
179
+
180
+ assert pub.winner == "X"
181
+ assert pub.terminated
182
+
183
+ @pytest.mark.asyncio
184
+ async def test_win_detection_diagonal(self, simple_task_instance):
185
+ """Test detecting wins in diagonals."""
186
+ engine = TicTacToeEngine(simple_task_instance)
187
+
188
+ # X wins in main diagonal
189
+ await engine._step_engine("A1") # X
190
+ await engine._step_engine("A2") # O
191
+ await engine._step_engine("B2") # X
192
+ await engine._step_engine("B1") # O
193
+ priv, pub = await engine._step_engine("C3") # X wins
194
+
195
+ assert pub.winner == "X"
196
+ assert pub.terminated
197
+
198
+ @pytest.mark.asyncio
199
+ async def test_draw_detection(self, simple_task_instance):
200
+ """Test detecting draws."""
201
+ engine = TicTacToeEngine(simple_task_instance)
202
+
203
+ # Play a game that ends in draw
204
+ moves = ["A1", "B2", "A2", "A3", "B3", "B1", "C1", "C3", "C2"]
205
+ for move in moves:
206
+ priv, pub = await engine._step_engine(move)
207
+
208
+ assert pub.winner == "draw"
209
+ assert pub.terminated
210
+ assert pub.move_count == 9
211
+ assert priv.reward_last == 0.0 # Draw reward
212
+
213
+ @pytest.mark.asyncio
214
+ async def test_board_text_representation(self, simple_task_instance):
215
+ """Test board text representation."""
216
+ engine = TicTacToeEngine(simple_task_instance)
217
+
218
+ await engine._step_engine("B2")
219
+ await engine._step_engine("A1")
220
+
221
+ priv, pub = engine.get_current_states_for_observation()
222
+ board_text = pub.board_text
223
+
224
+ assert " A B C" in board_text
225
+ assert "1 O " in board_text
226
+ assert "2 X " in board_text
227
+ assert "3 " in board_text
228
+
229
+ @pytest.mark.asyncio
230
+ async def test_serialization(self, simple_task_instance):
231
+ """Test engine serialization and deserialization."""
232
+ engine = TicTacToeEngine(simple_task_instance)
233
+
234
+ # Make some moves
235
+ await engine._step_engine("B2")
236
+ await engine._step_engine("A1")
237
+
238
+ # Serialize
239
+ snapshot = await engine._serialize_engine()
240
+
241
+ assert snapshot.engine_snapshot["current_player"] == "X"
242
+ assert snapshot.engine_snapshot["move_count"] == 2
243
+ assert snapshot.engine_snapshot["last_move"] == "A1"
244
+
245
+ # Deserialize
246
+ restored_engine = await TicTacToeEngine._deserialize_engine(snapshot)
247
+
248
+ assert restored_engine.current_player == engine.current_player
249
+ assert restored_engine.move_count == engine.move_count
250
+ assert np.array_equal(restored_engine.board, engine.board)
251
+
252
+ @pytest.mark.asyncio
253
+ async def test_state_diff(self, simple_task_instance):
254
+ """Test state diff functionality."""
255
+ engine = TicTacToeEngine(simple_task_instance)
256
+
257
+ priv1, pub1 = await engine._reset_engine()
258
+ priv2, pub2 = await engine._step_engine("B2")
259
+
260
+ # Test public state diff
261
+ diff = pub2.diff(pub1)
262
+ assert "board" in diff
263
+ assert "current_player" in diff
264
+ assert "last_move" in diff
265
+ assert "move_count" in diff
266
+
267
+ # Test private state diff
268
+ priv_diff = priv2.diff(priv1)
269
+ # reward_last might be 0.0 in both states, so it won't appear in diff
270
+ # Check that diff works by modifying reward
271
+ priv2.reward_last = 1.0
272
+ priv_diff = priv2.diff(priv1)
273
+ assert "reward_last" in priv_diff
274
+
275
+
276
+ class TestRewardComponents:
277
+ @pytest.mark.asyncio
278
+ async def test_win_component(self):
279
+ """Test win reward component."""
280
+ component = TicTacToeWinComponent(player_mark="X")
281
+
282
+ # Test win for X
283
+ state = TicTacToePublicState(
284
+ board=np.zeros(9),
285
+ current_player="O",
286
+ last_move="A3",
287
+ winner="X",
288
+ move_count=5,
289
+ max_moves=9,
290
+ terminated=True,
291
+ )
292
+ score = await component.score(state, "A3")
293
+ assert score == 1.0
294
+
295
+ # Test loss (O wins)
296
+ state.winner = "O"
297
+ score = await component.score(state, "A3")
298
+ assert score == -1.0
299
+
300
+ # Test no winner yet
301
+ state.winner = None
302
+ score = await component.score(state, "A3")
303
+ assert score == 0.0
304
+
305
+ @pytest.mark.asyncio
306
+ async def test_draw_component(self):
307
+ """Test draw reward component."""
308
+ component = TicTacToeDrawComponent()
309
+
310
+ state = TicTacToePublicState(
311
+ board=np.ones(9),
312
+ current_player="X",
313
+ last_move="C3",
314
+ winner="draw",
315
+ move_count=9,
316
+ max_moves=9,
317
+ terminated=True,
318
+ )
319
+
320
+ score = await component.score(state, "C3")
321
+ assert score == 0.0
322
+
323
+ # Test non-draw
324
+ state.winner = "X"
325
+ score = await component.score(state, "C3")
326
+ assert score == 0.0
327
+
328
+ @pytest.mark.asyncio
329
+ async def test_illegal_move_component(self):
330
+ """Test illegal move reward component."""
331
+ component = TicTacToeIllegalMoveComponent()
332
+
333
+ state = TicTacToePublicState(
334
+ board=np.zeros(9),
335
+ current_player="X",
336
+ last_move="A1",
337
+ winner=None,
338
+ move_count=1,
339
+ max_moves=9,
340
+ terminated=False,
341
+ )
342
+
343
+ # Test no illegal move
344
+ score = await component.score(state, "A1")
345
+ assert score == 0.0
346
+
347
+ # Test illegal move
348
+ component.illegal_move_attempted = True
349
+ score = await component.score(state, "A1")
350
+ assert score == -1.0
351
+ assert not component.illegal_move_attempted # Should reset
352
+
353
+
354
+ class TestConstants:
355
+ def test_coordinate_mappings(self):
356
+ """Test coordinate to index mappings."""
357
+ assert len(COORD_TO_IDX) == 9
358
+ assert len(IDX_TO_COORD) == 9
359
+
360
+ # Test all coordinates map correctly
361
+ for coord, idx in COORD_TO_IDX.items():
362
+ assert IDX_TO_COORD[idx] == coord
363
+
364
+ # Test specific mappings
365
+ assert COORD_TO_IDX["A1"] == 0
366
+ assert COORD_TO_IDX["B2"] == 4
367
+ assert COORD_TO_IDX["C3"] == 8
368
+
369
+ def test_win_patterns(self):
370
+ """Test win patterns cover all possibilities."""
371
+ assert len(WIN_PATTERNS) == 8 # 3 rows, 3 cols, 2 diagonals
372
+
373
+ # Test rows
374
+ assert [0, 1, 2] in WIN_PATTERNS
375
+ assert [3, 4, 5] in WIN_PATTERNS
376
+ assert [6, 7, 8] in WIN_PATTERNS
377
+
378
+ # Test columns
379
+ assert [0, 3, 6] in WIN_PATTERNS
380
+ assert [1, 4, 7] in WIN_PATTERNS
381
+ assert [2, 5, 8] in WIN_PATTERNS
382
+
383
+ # Test diagonals
384
+ assert [0, 4, 8] in WIN_PATTERNS
385
+ assert [2, 4, 6] in WIN_PATTERNS
386
+
387
+ def test_player_mappings(self):
388
+ """Test player mark mappings."""
389
+ assert PLAYER_MARKS["X"] == 1
390
+ assert PLAYER_MARKS["O"] == 2
391
+ assert MARK_TO_PLAYER[0] == " "
392
+ assert MARK_TO_PLAYER[1] == "X"
393
+ assert MARK_TO_PLAYER[2] == "O"