synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (226) hide show
  1. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -1
  2. examples/swe/task_app/grpo_swe_mini.py +55 -26
  3. examples/swe/task_app/hosted/rollout.py +40 -0
  4. examples/swe/task_app/hosted/test_service.py +5 -6
  5. examples/task_apps/TESTING.md +275 -0
  6. examples/task_apps/__init__.py +0 -0
  7. examples/task_apps/crafter/__init__.py +0 -0
  8. examples/task_apps/crafter/task_app/__init__.py +2 -0
  9. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +18 -13
  10. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  11. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
  12. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +25 -3
  13. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +10 -0
  14. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  15. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  16. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  17. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  18. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  19. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  20. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  21. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  22. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  23. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  24. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  25. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  26. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  27. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  28. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  29. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  30. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  31. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  32. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  33. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  34. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  35. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  36. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  37. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  38. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  39. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  40. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  41. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  42. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  43. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  44. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  45. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  71. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  72. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  73. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  74. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  75. examples/task_apps/enron/__init__.py +1 -0
  76. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  77. examples/task_apps/enron/task_app/README.md +14 -0
  78. examples/task_apps/enron/task_app/__init__.py +1 -0
  79. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  80. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  81. examples/task_apps/enron/tests/__init__.py +2 -0
  82. examples/task_apps/enron/tests/conftest.py +115 -0
  83. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  84. examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
  85. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  86. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  87. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  88. examples/task_apps/math/__init__.py +0 -0
  89. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  90. examples/task_apps/pokemon_battle/__init__.py +2 -0
  91. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  92. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  93. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  94. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  95. examples/task_apps/pokemon_red/README.md +357 -0
  96. examples/task_apps/pokemon_red/__init__.py +3 -0
  97. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  98. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
  99. examples/task_apps/pokemon_red/task_app.py +606 -0
  100. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
  101. examples/task_apps/sokoban/README.md +307 -0
  102. examples/task_apps/sokoban/__init__.py +3 -0
  103. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  104. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  105. examples/task_apps/sokoban/task_app.py +1058 -0
  106. examples/task_apps/sokoban/tests/__init__.py +2 -0
  107. examples/task_apps/sokoban/tests/conftest.py +113 -0
  108. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  109. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  110. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  111. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  112. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  113. examples/task_apps/verilog/__init__.py +1 -0
  114. examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
  115. examples/task_apps/verilog/task_app/README.md +12 -0
  116. examples/task_apps/verilog/task_app/__init__.py +1 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
  118. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  119. examples/task_apps/verilog/tests/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/conftest.py +115 -0
  121. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  122. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
  123. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  124. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  125. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  126. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  127. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  128. examples/workflows/__init__.py +0 -0
  129. examples/workflows/math_rl/__init__.py +0 -0
  130. examples/workflows/math_rl/download_dataset.py +80 -0
  131. synth_ai/__init__.py +2 -2
  132. synth_ai/api/train/builders.py +25 -11
  133. synth_ai/api/train/cli.py +12 -6
  134. synth_ai/api/train/configs/__init__.py +10 -10
  135. synth_ai/api/train/configs/rl.py +5 -4
  136. synth_ai/api/train/configs/sft.py +4 -3
  137. synth_ai/api/train/env_resolver.py +5 -2
  138. synth_ai/api/train/supported_algos.py +10 -5
  139. synth_ai/api/train/utils.py +7 -4
  140. synth_ai/cli/__init__.py +7 -51
  141. synth_ai/cli/_storage.py +4 -3
  142. synth_ai/cli/_validate_task_app.py +11 -0
  143. synth_ai/cli/balance.py +4 -3
  144. synth_ai/cli/calc.py +2 -2
  145. synth_ai/cli/demo.py +14 -7
  146. synth_ai/cli/legacy_root_backup.py +1 -1
  147. synth_ai/cli/rl_demo.py +8 -7
  148. synth_ai/cli/root.py +0 -97
  149. synth_ai/cli/task_apps.py +1707 -186
  150. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
  151. synth_ai/environments/examples/enron/engine.py +7 -2
  152. synth_ai/environments/examples/enron/environment.py +68 -0
  153. synth_ai/environments/examples/red/engine.py +27 -0
  154. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  155. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  156. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  157. synth_ai/environments/examples/red/environment.py +60 -0
  158. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  159. synth_ai/environments/examples/verilog/engine.py +30 -4
  160. synth_ai/evals/client.py +58 -61
  161. synth_ai/jobs/client.py +16 -4
  162. synth_ai/judge_schemas.py +16 -16
  163. synth_ai/py.typed +0 -0
  164. synth_ai/task/__init__.py +14 -5
  165. synth_ai/task/contracts.py +124 -38
  166. synth_ai/task/proxy.py +48 -56
  167. synth_ai/task/rubrics/__init__.py +53 -0
  168. synth_ai/task/rubrics/loaders.py +133 -0
  169. synth_ai/task/rubrics/models.py +57 -0
  170. synth_ai/task/rubrics/scoring.py +113 -0
  171. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  172. synth_ai/task/server.py +8 -7
  173. synth_ai/task/validators.py +269 -6
  174. synth_ai/tracing_v3/decorators.py +7 -3
  175. synth_ai/tracing_v3/replica_sync.py +4 -4
  176. synth_ai/tracing_v3/serialization.py +5 -5
  177. synth_ai/tracing_v3/trace_utils.py +317 -0
  178. synth_ai/tracing_v3/turso/native_manager.py +3 -3
  179. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
  180. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +214 -101
  181. examples/agora_ex/README_MoE.md +0 -224
  182. examples/agora_ex/__init__.py +0 -7
  183. examples/agora_ex/agora_ex.py +0 -65
  184. examples/agora_ex/agora_ex_task_app.py +0 -590
  185. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  186. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  187. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  188. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  189. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  190. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  191. synth_ai/rubrics/__init__.py +0 -22
  192. synth_ai/task/rubrics.py +0 -219
  193. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  194. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  195. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  196. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  197. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  198. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  199. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  200. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  201. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
  202. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
  203. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  204. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  205. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  206. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  207. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +0 -0
  208. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  209. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  210. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  211. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  212. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  213. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
  214. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  215. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  216. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  217. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  218. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  219. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  220. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  221. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  222. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  223. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
  224. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -0
  225. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
  226. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  """Compatibility wrapper for the GRPO Crafter task app.
2
2
 
3
3
  This module now delegates to the TaskAppConfig defined in the local example at
4
- `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
4
+ `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling).
6
6
  Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
7
7
  """
@@ -21,25 +21,37 @@ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
21
21
 
22
22
 
23
23
  def _load_build_config():
24
- # Find synth_ai package location to locate examples/
25
- import synth_ai
24
+ """Load the example's build_config, preferring package import with file fallback."""
25
+ # First try to import by package name (installed 'examples' package)
26
+ try:
27
+ module = importlib.import_module("examples.task_apps.crafter.task_app.grpo_crafter")
28
+ return module.build_config # type: ignore[attr-defined]
29
+ except Exception:
30
+ # Fallback: locate the file within the installed synth_ai distribution and exec it
31
+ import sys as _sys
32
+
33
+ import synth_ai
34
+
35
+ synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
36
+ module_path = (
37
+ synth_ai_path / "examples" / "task_apps" / "crafter" / "task_app" / "grpo_crafter.py"
38
+ )
26
39
 
27
- synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
28
- module_path = synth_ai_path / "examples" / "warming_up_to_rl" / "task_app" / "grpo_crafter.py"
40
+ if not module_path.exists():
41
+ raise ImportError(
42
+ f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
43
+ ) from None
29
44
 
30
- if not module_path.exists():
31
- raise ImportError(
32
- f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
45
+ spec = importlib.util.spec_from_file_location(
46
+ "examples.task_apps.crafter.task_app.grpo_crafter", module_path
33
47
  )
48
+ if spec is None or spec.loader is None:
49
+ raise ImportError(f"Could not load task app module at {module_path}") from None
34
50
 
35
- spec = importlib.util.spec_from_file_location(
36
- "warming_up_to_rl.task_app.grpo_crafter", module_path
37
- )
38
- if spec is None or spec.loader is None:
39
- raise ImportError(f"Could not load task app module at {module_path}")
40
- module = importlib.util.module_from_spec(spec)
41
- spec.loader.exec_module(module)
42
- return module.build_config
51
+ module = importlib.util.module_from_spec(spec)
52
+ _sys.modules[spec.name] = module
53
+ spec.loader.exec_module(module)
54
+ return module.build_config # type: ignore[attr-defined]
43
55
 
44
56
 
45
57
  build_config = _load_build_config()
@@ -26,7 +26,10 @@ from synth_ai.environments.examples.enron.taskset import EnronTaskInstance
26
26
 
27
27
  # SQLite-backed helpers
28
28
  from synth_ai.environments.stateful.engine import StatefulEngine, StatefulEngineSnapshot
29
- from synth_ai.zyk import LM # Import LM class
29
+ try: # pragma: no cover - optional dependency
30
+ from synth_ai.zyk import LM # type: ignore
31
+ except ImportError: # pragma: no cover - fallback when LM unavailable
32
+ LM = None
30
33
 
31
34
  # --------------------------------------------------------------------------- actions
32
35
  ACTION_SEARCH = "search"
@@ -244,7 +247,9 @@ class EnronEngine(StatefulEngine):
244
247
  async def determine_if_answer_is_correct(
245
248
  question: str, gold_answer: str, agent_answer: str
246
249
  ) -> bool:
247
- # Instantiate LM for the judge
250
+ if LM is None:
251
+ return gold_answer.strip().lower() == agent_answer.strip().lower()
252
+
248
253
  llm = LM(model_name="gpt-4.1-nano", formatting_model_name="gpt-4.1-nano", temperature=0.0)
249
254
 
250
255
  system_prompt = (
@@ -9,6 +9,7 @@ from synth_ai.environments.environment.shared_engine import (
9
9
  InternalObservation,
10
10
  )
11
11
  from synth_ai.environments.environment.tools import (
12
+ AbstractTool,
12
13
  TOOL_REGISTRY,
13
14
  EnvToolCall,
14
15
  ToolResult,
@@ -65,6 +66,73 @@ class Terminate(EnvToolCall):
65
66
  self.action = (ACTION_ANSWER, "")
66
67
 
67
68
 
69
+ class TerminateArgs(BaseModel):
70
+ pass
71
+
72
+
73
+ class SearchEmailsTool(AbstractTool):
74
+ name = "search_emails"
75
+ call_schema = SearchEmailsArgs
76
+
77
+ def __init__(self, engine: EnronEngine):
78
+ self.engine = engine
79
+
80
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
81
+ try:
82
+ args = self.call_schema.model_validate(call.args or {})
83
+ results = await self.engine.search_emails_action(args.model_dump())
84
+ return ToolResult(ok=True, payload={"search_results": results})
85
+ except Exception as exc: # pragma: no cover - runtime safety
86
+ return ToolResult(ok=False, error=str(exc))
87
+
88
+
89
+ class ReadEmailTool(AbstractTool):
90
+ name = "read_email"
91
+ call_schema = ReadEmailArgs
92
+
93
+ def __init__(self, engine: EnronEngine):
94
+ self.engine = engine
95
+
96
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
97
+ try:
98
+ args = self.call_schema.model_validate(call.args or {})
99
+ email = await self.engine.read_email_action(args.message_id)
100
+ return ToolResult(ok=True, payload={"email": email})
101
+ except Exception as exc: # pragma: no cover
102
+ return ToolResult(ok=False, error=str(exc))
103
+
104
+
105
+ class AnswerQuestionTool(AbstractTool):
106
+ name = "answer_question"
107
+ call_schema = AnswerQuestionArgs
108
+
109
+ def __init__(self, engine: EnronEngine):
110
+ self.engine = engine
111
+
112
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
113
+ try:
114
+ args = self.call_schema.model_validate(call.args or {})
115
+ await self.engine.answer_question_action(args.answer)
116
+ return ToolResult(ok=True, payload={"status": "answer_recorded"})
117
+ except Exception as exc: # pragma: no cover
118
+ return ToolResult(ok=False, error=str(exc))
119
+
120
+
121
+ class TerminateTool(AbstractTool):
122
+ name = "terminate"
123
+ call_schema = TerminateArgs
124
+
125
+ def __init__(self, engine: EnronEngine):
126
+ self.engine = engine
127
+
128
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
129
+ try:
130
+ await self.engine.answer_question_action("")
131
+ return ToolResult(ok=True, payload={"status": "terminated"})
132
+ except Exception as exc: # pragma: no cover
133
+ return ToolResult(ok=False, error=str(exc))
134
+
135
+
68
136
  # -------- observation callable (optional for formatted observations)
69
137
  class SynthEnronObservationCallable(GetObservationCallable):
70
138
  async def get_observation(
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  from dataclasses import dataclass
5
+ import os
5
6
  from pathlib import Path
6
7
  from typing import Any, Dict, List, Optional
7
8
 
@@ -122,6 +123,13 @@ class GameSystemState:
122
123
  menu_state: int
123
124
  text_box_active: bool
124
125
  warp_flag: int
126
+ # Battle-specific data
127
+ enemy_hp_current: int = 0
128
+ enemy_hp_max: int = 0
129
+ enemy_hp_percentage: float = 0.0
130
+ enemy_level: int = 0
131
+ enemy_species_id: int = 0
132
+ battle_turn: int = 0
125
133
  # TODO: Add when available
126
134
  # current_menu_type: str = ""
127
135
  # dialogue_speaker: str = ""
@@ -278,12 +286,21 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
278
286
 
279
287
  def _get_rom_path(self) -> Path:
280
288
  """Get path to Pokemon Red ROM file"""
289
+ # Highest priority: explicit environment variable
290
+ env_rom = os.getenv("POKEMON_RED_ROM")
291
+ if env_rom:
292
+ p = Path(env_rom).expanduser()
293
+ if p.exists():
294
+ return p
295
+
281
296
  # Check several possible locations
282
297
  possible_paths = [
283
298
  Path(__file__).parent / "roms" / "pokemon_red.gb",
284
299
  Path(__file__).parent / "roms" / "PokemonRed.gb",
285
300
  Path(__file__).parent / "vendor" / "pokemon_red.gb",
286
301
  Path.home() / "Games" / "pokemon_red.gb",
302
+ # Common example location where users may drop the ROM
303
+ Path(__file__).resolve().parents[5] / "examples" / "task_apps" / "pokemon_red" / "Pokemon - Red Version (USA, Europe) (SGB Enhanced).gb",
287
304
  ]
288
305
 
289
306
  for path in possible_paths:
@@ -533,6 +550,12 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
533
550
  menu_state=int(current_state.get("menu_state", 0)),
534
551
  text_box_active=bool(current_state.get("text_box_active", False)),
535
552
  warp_flag=int(current_state.get("warp_flag", 0)),
553
+ enemy_hp_current=int(current_state.get("enemy_hp_current", 0)),
554
+ enemy_hp_max=int(current_state.get("enemy_hp_max", 0)),
555
+ enemy_hp_percentage=float(current_state.get("enemy_hp_percentage", 0.0)),
556
+ enemy_level=int(current_state.get("enemy_level", 0)),
557
+ enemy_species_id=int(current_state.get("enemy_species_id", 0)),
558
+ battle_turn=int(current_state.get("battle_turn", 0)),
536
559
  ),
537
560
  )
538
561
 
@@ -613,6 +636,10 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
613
636
  "prev_in_battle": bool(prev_state.get("in_battle", False)),
614
637
  "prev_party_level": int(prev_state.get("party_level", 0)),
615
638
  "prev_party_xp": int(prev_state.get("party_xp", 0)),
639
+ "prev_party_count": int(prev_state.get("party_count", 0)),
640
+ "prev_text_box_active": bool(prev_state.get("text_box_active", False)),
641
+ "prev_enemy_hp_current": int(prev_state.get("enemy_hp_current", 0)),
642
+ "prev_enemy_hp_percentage": float(prev_state.get("enemy_hp_percentage", 0.0)),
616
643
  },
617
644
  )
618
645
  except Exception as e:
@@ -6,6 +6,13 @@ PLAYER_Y = 0xD361 # player Y coordinate
6
6
  IN_BATTLE_FLAG = 0xD057 # battle state flag
7
7
  BATTLE_OUTCOME = 0xD089 # 0=ongoing, 1=win, 2=lose
8
8
 
9
+ # Battle-specific data
10
+ ENEMY_HP_CURRENT = 0xCFE6 # enemy Pokemon current HP (2 bytes)
11
+ ENEMY_HP_MAX = 0xCFE8 # enemy Pokemon max HP (2 bytes)
12
+ ENEMY_LEVEL = 0xD127 # enemy Pokemon level
13
+ ENEMY_SPECIES = 0xCFE5 # enemy Pokemon species ID
14
+ BATTLE_TURN = 0xCC2F # current battle turn counter
15
+
9
16
  # Party Pokemon data (up to 6 Pokemon)
10
17
  PARTY_COUNT = 0xD163 # number of Pokemon in party (0-6)
11
18
  PARTY_SPECIES = 0xD164 # species of each Pokemon (6 bytes)