synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (229) hide show
  1. examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
  2. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
  4. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
  5. examples/multi_step/crafter_rl_lora.md +51 -10
  6. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  7. examples/multi_step/task_app_config_notes.md +7 -1
  8. examples/swe/task_app/grpo_swe_mini.py +55 -26
  9. examples/swe/task_app/hosted/rollout.py +40 -0
  10. examples/swe/task_app/hosted/test_service.py +5 -6
  11. examples/task_apps/TESTING.md +275 -0
  12. examples/task_apps/__init__.py +0 -0
  13. examples/task_apps/crafter/__init__.py +0 -0
  14. examples/task_apps/crafter/task_app/__init__.py +2 -0
  15. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
  16. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  17. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
  18. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
  19. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
  20. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
  21. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  22. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  23. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  24. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  25. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  26. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  27. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  28. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  29. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  30. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  31. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  32. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  33. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  34. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  35. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  36. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  37. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  38. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  39. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  40. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  41. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  42. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  43. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  44. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  45. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  78. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  79. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  80. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  81. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  82. examples/task_apps/enron/__init__.py +1 -0
  83. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  84. examples/task_apps/enron/task_app/README.md +14 -0
  85. examples/task_apps/enron/task_app/__init__.py +1 -0
  86. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  87. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  88. examples/task_apps/enron/tests/__init__.py +2 -0
  89. examples/task_apps/enron/tests/conftest.py +115 -0
  90. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  91. examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
  92. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  93. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  94. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  95. examples/task_apps/math/__init__.py +0 -0
  96. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  97. examples/task_apps/pokemon_battle/__init__.py +2 -0
  98. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  99. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  100. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  101. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  102. examples/task_apps/pokemon_red/README.md +357 -0
  103. examples/task_apps/pokemon_red/__init__.py +3 -0
  104. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  105. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
  106. examples/task_apps/pokemon_red/task_app.py +606 -0
  107. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
  108. examples/task_apps/sokoban/README.md +307 -0
  109. examples/task_apps/sokoban/__init__.py +3 -0
  110. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  111. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  112. examples/task_apps/sokoban/task_app.py +1058 -0
  113. examples/task_apps/sokoban/tests/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/conftest.py +113 -0
  115. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  116. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  117. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  118. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  119. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  120. examples/task_apps/verilog/__init__.py +1 -0
  121. examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
  122. examples/task_apps/verilog/task_app/README.md +12 -0
  123. examples/task_apps/verilog/task_app/__init__.py +1 -0
  124. examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
  125. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  126. examples/task_apps/verilog/tests/__init__.py +2 -0
  127. examples/task_apps/verilog/tests/conftest.py +115 -0
  128. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  129. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
  130. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  131. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  132. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  133. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  134. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  135. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
  136. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
  137. examples/warming_up_to_rl/run_eval.py +127 -18
  138. examples/workflows/__init__.py +0 -0
  139. examples/workflows/math_rl/__init__.py +0 -0
  140. examples/workflows/math_rl/download_dataset.py +80 -0
  141. synth_ai/__init__.py +41 -1
  142. synth_ai/api/train/builders.py +73 -29
  143. synth_ai/api/train/cli.py +12 -6
  144. synth_ai/api/train/configs/__init__.py +44 -0
  145. synth_ai/api/train/configs/rl.py +134 -0
  146. synth_ai/api/train/configs/sft.py +95 -0
  147. synth_ai/api/train/configs/shared.py +24 -0
  148. synth_ai/api/train/env_resolver.py +5 -2
  149. synth_ai/api/train/supported_algos.py +10 -5
  150. synth_ai/api/train/utils.py +7 -4
  151. synth_ai/cli/__init__.py +7 -51
  152. synth_ai/cli/_storage.py +4 -3
  153. synth_ai/cli/_validate_task_app.py +11 -0
  154. synth_ai/cli/balance.py +4 -3
  155. synth_ai/cli/calc.py +2 -2
  156. synth_ai/cli/demo.py +49 -43
  157. synth_ai/cli/legacy_root_backup.py +1 -1
  158. synth_ai/cli/rl_demo.py +86 -106
  159. synth_ai/cli/root.py +0 -97
  160. synth_ai/cli/task_apps.py +1710 -186
  161. synth_ai/demos/core/cli.py +121 -159
  162. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
  163. synth_ai/environments/examples/crafter_classic/environment.py +16 -0
  164. synth_ai/environments/examples/enron/engine.py +7 -2
  165. synth_ai/environments/examples/enron/environment.py +68 -0
  166. synth_ai/environments/examples/red/engine.py +27 -0
  167. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  168. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  169. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  170. synth_ai/environments/examples/red/environment.py +60 -0
  171. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  172. synth_ai/environments/examples/verilog/engine.py +30 -4
  173. synth_ai/evals/__init__.py +15 -0
  174. synth_ai/evals/client.py +82 -0
  175. synth_ai/evals/types.py +42 -0
  176. synth_ai/jobs/client.py +16 -4
  177. synth_ai/judge_schemas.py +127 -0
  178. synth_ai/py.typed +0 -0
  179. synth_ai/task/__init__.py +14 -5
  180. synth_ai/task/contracts.py +124 -38
  181. synth_ai/task/proxy.py +48 -56
  182. synth_ai/task/rubrics/__init__.py +53 -0
  183. synth_ai/task/rubrics/loaders.py +133 -0
  184. synth_ai/task/rubrics/models.py +57 -0
  185. synth_ai/task/rubrics/scoring.py +113 -0
  186. synth_ai/task/rubrics/strict.py +149 -0
  187. synth_ai/task/server.py +8 -7
  188. synth_ai/task/validators.py +269 -6
  189. synth_ai/tracing_v3/decorators.py +7 -3
  190. synth_ai/tracing_v3/replica_sync.py +4 -4
  191. synth_ai/tracing_v3/serialization.py +130 -0
  192. synth_ai/tracing_v3/trace_utils.py +317 -0
  193. synth_ai/tracing_v3/turso/native_manager.py +3 -3
  194. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
  195. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
  196. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
  197. synth_ai/task/rubrics.py +0 -219
  198. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  199. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  200. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  201. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  202. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  203. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  204. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  205. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  206. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
  207. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
  208. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  209. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  210. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  211. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  212. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  213. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  214. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  215. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  216. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  217. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
  218. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  219. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  220. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  221. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  222. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  223. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  224. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  225. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  226. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  227. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
  228. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
  229. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import argparse
4
3
  import contextlib
5
4
  import json
6
5
  import os
@@ -45,7 +44,7 @@ def _is_modal_public_url(u: str) -> bool:
45
44
  return False
46
45
 
47
46
 
48
- def cmd_setup(_args: argparse.Namespace) -> int:
47
+ def setup() -> int:
49
48
  # Change to demo directory if stored
50
49
  demo_dir = demo_core.load_demo_dir()
51
50
  if demo_dir and os.path.isdir(demo_dir):
@@ -760,7 +759,9 @@ def _ensure_task_app_ready(env: DemoEnv, synth_key: str, *, label: str) -> DemoE
760
759
  return updated_env
761
760
 
762
761
 
763
- def cmd_deploy(args: argparse.Namespace) -> int:
762
+ def deploy(
763
+ local: bool = False, app: str | None = None, name: str | None = None, script: str | None = None
764
+ ) -> int:
764
765
  # Change to demo directory if stored
765
766
  demo_dir = demo_core.load_demo_dir()
766
767
  if demo_dir and os.path.isdir(demo_dir):
@@ -774,7 +775,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
774
775
  url = ""
775
776
  app_name = env.task_app_name or ""
776
777
  try:
777
- if args.local:
778
+ if local:
778
779
  print("Starting local Task App…")
779
780
  import subprocess
780
781
 
@@ -798,7 +799,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
798
799
  time.sleep(1)
799
800
  else:
800
801
  # Auto-detect app path if not supplied; prompt interactively from discovered ASGI apps
801
- app_path = os.path.abspath(args.app) if args.app else None
802
+ app_path = os.path.abspath(app) if app else None
802
803
  if not app_path or not os.path.isfile(app_path):
803
804
  # First pass: look for known common filenames
804
805
  candidates = [
@@ -828,13 +829,13 @@ def cmd_deploy(args: argparse.Namespace) -> int:
828
829
  choice = 1
829
830
  choice = max(1, min(choice, len(found)))
830
831
  app_path = str(found[choice - 1].resolve())
831
- if not app_path and args.script:
832
+ if not app_path and script:
832
833
  # Legacy script fallback if user supplied --script explicitly
833
834
  from synth_ai.demos.demo_task_apps.math.deploy_modal import deploy as modal_deploy
834
835
 
835
- url = modal_deploy(script_path=args.script, env_api_key=env.env_api_key)
836
- if args.name:
837
- app_name = args.name
836
+ url = modal_deploy(script_path=script, env_api_key=env.env_api_key)
837
+ if name:
838
+ app_name = name
838
839
  else:
839
840
  if not app_path:
840
841
  entered = input("Path to Modal app.py (e.g., ./task_app.py): ").strip()
@@ -845,7 +846,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
845
846
  raise FileNotFoundError(f"App file not found: {app_path}")
846
847
  # Surface the app path before asking for the name
847
848
  print(f"Using task app: {app_path}")
848
- existing_name = (args.name or env.task_app_name or "").strip()
849
+ existing_name = (name or env.task_app_name or "").strip()
849
850
  if not existing_name:
850
851
  existing_name = f"synth-{os.path.splitext(os.path.basename(app_path))[0]}"
851
852
  suggested_name = existing_name
@@ -1128,7 +1129,7 @@ def _ensure_modal_installed() -> None:
1128
1129
  print("\n You can deploy later after authenticating.\n")
1129
1130
 
1130
1131
 
1131
- def cmd_init(args: argparse.Namespace) -> int:
1132
+ def init(template: str | None = None, dest: str | None = None, force: bool = False) -> int:
1132
1133
  """Materialise a demo task app template into the current directory."""
1133
1134
 
1134
1135
  templates = list(list_demo_templates())
@@ -1137,37 +1138,44 @@ def cmd_init(args: argparse.Namespace) -> int:
1137
1138
  return 1
1138
1139
 
1139
1140
  selected: DemoTemplate | None = None
1140
- if args.template:
1141
- selected = get_demo_template(args.template)
1141
+ if template:
1142
+ selected = get_demo_template(template)
1142
1143
  if selected is None:
1143
1144
  available = ", ".join(t.template_id for t in templates)
1144
- print(f"Unknown template '{args.template}'. Available: {available}")
1145
+ print(f"Unknown template '{template}'. Available: {available}")
1145
1146
  return 1
1146
1147
  else:
1147
- print("Select a demo template:" + "\n")
1148
- for idx, template in enumerate(templates, start=1):
1149
- print(f" [{idx}] {template.name} ({template.template_id})")
1150
- print(f" {template.description}")
1151
- try:
1152
- choice_raw = input(f"Enter choice [1-{len(templates)}] (default 1): ").strip() or "1"
1153
- except Exception:
1154
- choice_raw = "1"
1155
- if not choice_raw.isdigit():
1156
- print("Selection must be a number.")
1157
- return 1
1158
- choice_idx = int(choice_raw)
1159
- if not 1 <= choice_idx <= len(templates):
1160
- print("Selection out of range.")
1161
- return 1
1162
- selected = templates[choice_idx - 1]
1148
+ if force:
1149
+ selected = templates[0]
1150
+ print(
1151
+ f"Using default template: {selected.name} ({selected.template_id}) "
1152
+ f"(pass --template to choose another)"
1153
+ )
1154
+ else:
1155
+ print("Select a demo template:" + "\n")
1156
+ for idx, tpl in enumerate(templates, start=1):
1157
+ print(f" [{idx}] {tpl.name} ({tpl.template_id})")
1158
+ print(f" {tpl.description}")
1159
+ try:
1160
+ choice_raw = input(f"Enter choice [1-{len(templates)}] (default 1): ").strip() or "1"
1161
+ except Exception:
1162
+ choice_raw = "1"
1163
+ if not choice_raw.isdigit():
1164
+ print("Selection must be a number.")
1165
+ return 1
1166
+ choice_idx = int(choice_raw)
1167
+ if not 1 <= choice_idx <= len(templates):
1168
+ print("Selection out of range.")
1169
+ return 1
1170
+ selected = templates[choice_idx - 1]
1163
1171
 
1164
1172
  assert selected is not None
1165
1173
 
1166
1174
  default_subdir = selected.default_subdir or selected.template_id
1167
1175
 
1168
1176
  # Check if default destination is already occupied and switch to local_demos/ if needed
1169
- if args.dest:
1170
- default_dest = Path(args.dest).expanduser().resolve()
1177
+ if dest:
1178
+ default_dest = Path(dest).expanduser().resolve()
1171
1179
  else:
1172
1180
  primary_dest = Path.cwd() / default_subdir
1173
1181
  if primary_dest.exists() and any(primary_dest.iterdir()):
@@ -1176,10 +1184,13 @@ def cmd_init(args: argparse.Namespace) -> int:
1176
1184
  else:
1177
1185
  default_dest = primary_dest.resolve()
1178
1186
 
1179
- try:
1180
- dest_input = input(f"Destination directory [{default_dest}]: ").strip()
1181
- except Exception:
1187
+ if force:
1182
1188
  dest_input = ""
1189
+ else:
1190
+ try:
1191
+ dest_input = input(f"Destination directory [{default_dest}]: ").strip()
1192
+ except Exception:
1193
+ dest_input = ""
1183
1194
  destination = Path(dest_input).expanduser().resolve() if dest_input else default_dest
1184
1195
 
1185
1196
  # Track whether we should skip individual file prompts (if we already cleared the directory)
@@ -1190,15 +1201,18 @@ def cmd_init(args: argparse.Namespace) -> int:
1190
1201
  print(f"Destination {destination} is a file. Provide a directory path.")
1191
1202
  return 1
1192
1203
  if any(destination.iterdir()):
1193
- try:
1194
- response = (
1195
- input(f"Destination {destination} is not empty. Overwrite? [y/N]: ")
1196
- .strip()
1197
- .lower()
1198
- )
1199
- except (EOFError, KeyboardInterrupt):
1200
- print("\nCancelled.")
1201
- return 1
1204
+ if force:
1205
+ response = "y"
1206
+ else:
1207
+ try:
1208
+ response = (
1209
+ input(f"Destination {destination} is not empty. Overwrite? [y/N]: ")
1210
+ .strip()
1211
+ .lower()
1212
+ )
1213
+ except (EOFError, KeyboardInterrupt):
1214
+ print("\nCancelled.")
1215
+ return 1
1202
1216
  if response not in ("y", "yes"):
1203
1217
  print("Cancelled. Choose another directory or delete the existing one.")
1204
1218
  return 1
@@ -1236,15 +1250,18 @@ def cmd_init(args: argparse.Namespace) -> int:
1236
1250
  # Handle directory copying
1237
1251
  if src_path.is_dir():
1238
1252
  if dest_path.exists() and not directory_cleared:
1239
- try:
1240
- response = (
1241
- input(f"Directory {dest_path.name} exists. Overwrite? [y/N]: ")
1242
- .strip()
1243
- .lower()
1244
- )
1245
- except (EOFError, KeyboardInterrupt):
1246
- print("\nCancelled.")
1247
- return 1
1253
+ if force:
1254
+ response = "y"
1255
+ else:
1256
+ try:
1257
+ response = (
1258
+ input(f"Directory {dest_path.name} exists. Overwrite? [y/N]: ")
1259
+ .strip()
1260
+ .lower()
1261
+ )
1262
+ except (EOFError, KeyboardInterrupt):
1263
+ print("\nCancelled.")
1264
+ return 1
1248
1265
  if response not in ("y", "yes"):
1249
1266
  print(f"Skipping {dest_path.name}")
1250
1267
  continue
@@ -1256,15 +1273,18 @@ def cmd_init(args: argparse.Namespace) -> int:
1256
1273
  # Handle file copying
1257
1274
  dest_path.parent.mkdir(parents=True, exist_ok=True)
1258
1275
  if dest_path.exists() and not directory_cleared:
1259
- try:
1260
- response = (
1261
- input(f"File {dest_path.name} exists. Overwrite? [y/N]: ")
1262
- .strip()
1263
- .lower()
1264
- )
1265
- except (EOFError, KeyboardInterrupt):
1266
- print("\nCancelled.")
1267
- return 1
1276
+ if force:
1277
+ response = "y"
1278
+ else:
1279
+ try:
1280
+ response = (
1281
+ input(f"File {dest_path.name} exists. Overwrite? [y/N]: ")
1282
+ .strip()
1283
+ .lower()
1284
+ )
1285
+ except (EOFError, KeyboardInterrupt):
1286
+ print("\nCancelled.")
1287
+ return 1
1268
1288
  if response not in ("y", "yes"):
1269
1289
  print(f"Skipping {dest_path.name}")
1270
1290
  continue
@@ -1280,11 +1300,14 @@ def cmd_init(args: argparse.Namespace) -> int:
1280
1300
  env_path = destination / ".env"
1281
1301
  should_write = True
1282
1302
  if env_path.exists() and not directory_cleared:
1283
- try:
1284
- response = input("File .env exists. Overwrite? [y/N]: ").strip().lower()
1285
- except (EOFError, KeyboardInterrupt):
1286
- print("\nCancelled.")
1287
- return 1
1303
+ if force:
1304
+ response = "y"
1305
+ else:
1306
+ try:
1307
+ response = input("File .env exists. Overwrite? [y/N]: ").strip().lower()
1308
+ except (EOFError, KeyboardInterrupt):
1309
+ print("\nCancelled.")
1310
+ return 1
1288
1311
  should_write = response in ("y", "yes")
1289
1312
  if should_write:
1290
1313
  _write_text(env_path, "\n".join(selected.env_lines) + "\n")
@@ -1296,13 +1319,16 @@ def cmd_init(args: argparse.Namespace) -> int:
1296
1319
  cfg_dst = (destination / selected.config_destination).resolve()
1297
1320
  should_copy = True
1298
1321
  if cfg_dst.exists() and not directory_cleared:
1299
- try:
1300
- response = (
1301
- input(f"File {cfg_dst.name} exists. Overwrite? [y/N]: ").strip().lower()
1302
- )
1303
- except (EOFError, KeyboardInterrupt):
1304
- print("\nCancelled.")
1305
- return 1
1322
+ if force:
1323
+ response = "y"
1324
+ else:
1325
+ try:
1326
+ response = (
1327
+ input(f"File {cfg_dst.name} exists. Overwrite? [y/N]: ").strip().lower()
1328
+ )
1329
+ except (EOFError, KeyboardInterrupt):
1330
+ print("\nCancelled.")
1331
+ return 1
1306
1332
  should_copy = response in ("y", "yes")
1307
1333
  if should_copy:
1308
1334
  cfg_dst.parent.mkdir(parents=True, exist_ok=True)
@@ -1388,7 +1414,14 @@ def _write_text(path: str, content: str) -> None:
1388
1414
  # Note: `prepare` command has been removed; configuration now prepares TOML
1389
1415
 
1390
1416
 
1391
- def cmd_run(args: argparse.Namespace) -> int:
1417
+ def run(
1418
+ config: str | None = None,
1419
+ batch_size: int | None = None,
1420
+ group_size: int | None = None,
1421
+ model: str | None = None,
1422
+ timeout: int = 600,
1423
+ dry_run: bool = False,
1424
+ ) -> int:
1392
1425
  # Change to demo directory if stored
1393
1426
  demo_dir = demo_core.load_demo_dir()
1394
1427
  if demo_dir and os.path.isdir(demo_dir):
@@ -1429,7 +1462,7 @@ def cmd_run(args: argparse.Namespace) -> int:
1429
1462
  import tomllib
1430
1463
 
1431
1464
  try:
1432
- cfg_path = _select_or_create_config(getattr(args, "config", None), env)
1465
+ cfg_path = _select_or_create_config(config, env)
1433
1466
  except FileNotFoundError as exc:
1434
1467
  print(exc)
1435
1468
  return 1
@@ -1451,12 +1484,12 @@ def cmd_run(args: argparse.Namespace) -> int:
1451
1484
  # Optional: TRAINER_START_URL passthrough if already set in environment
1452
1485
  run_env["TRAINER_START_URL"] = run_env.get("TRAINER_START_URL", "")
1453
1486
  # Forward convenience knobs
1454
- if args.batch_size is not None:
1455
- run_env["RL_BATCH_SIZE"] = str(int(args.batch_size))
1456
- if args.group_size is not None:
1457
- run_env["RL_GROUP_SIZE"] = str(int(args.group_size))
1458
- if args.model:
1459
- run_env["RL_MODEL"] = args.model
1487
+ if batch_size is not None:
1488
+ run_env["RL_BATCH_SIZE"] = str(int(batch_size))
1489
+ if group_size is not None:
1490
+ run_env["RL_GROUP_SIZE"] = str(int(group_size))
1491
+ if model:
1492
+ run_env["RL_MODEL"] = model
1460
1493
  cmd = ["uv", "run", "python", launcher]
1461
1494
  print(f"Launching monorepo clustered runner: {' '.join(cmd)}")
1462
1495
  code = _popen_stream(cmd, env=run_env)
@@ -1484,11 +1517,11 @@ def cmd_run(args: argparse.Namespace) -> int:
1484
1517
  inline_cfg = tomllib.load(fh)
1485
1518
  with open(cfg_path) as fh2:
1486
1519
  toml_text = fh2.read()
1487
- if args.batch_size is not None:
1488
- inline_cfg.setdefault("training", {})["batch_size"] = int(args.batch_size)
1489
- if args.group_size is not None:
1490
- inline_cfg.setdefault("training", {})["group_size"] = int(args.group_size)
1491
- model_name = args.model or (inline_cfg.get("model", {}) or {}).get("name", "Qwen/Qwen3-0.6B")
1520
+ if batch_size is not None:
1521
+ inline_cfg.setdefault("training", {})["batch_size"] = int(batch_size)
1522
+ if group_size is not None:
1523
+ inline_cfg.setdefault("training", {})["group_size"] = int(group_size)
1524
+ model_name = model or (inline_cfg.get("model", {}) or {}).get("name", "Qwen/Qwen3-0.6B")
1492
1525
  api = env.dev_backend_url.rstrip("/") + ("" if env.dev_backend_url.endswith("/api") else "/api")
1493
1526
  # Print backend and key preview before request for clearer diagnostics
1494
1527
  try:
@@ -1678,79 +1711,8 @@ def cmd_run(args: argparse.Namespace) -> int:
1678
1711
  if name == "eval.reward_mean":
1679
1712
  print(f"metric eval.reward_mean step={p.get('step')} value={p.get('value')}")
1680
1713
  break
1681
- if time.time() - start_t > (args.timeout or 600):
1714
+ if time.time() - start_t > (timeout or 600):
1682
1715
  print("Timeout waiting for terminal state.")
1683
1716
  break
1684
1717
  time.sleep(2)
1685
1718
  return 0
1686
-
1687
-
1688
- def main(argv: list[str] | None = None) -> int:
1689
- p = argparse.ArgumentParser(prog="synth-ai")
1690
- sub = p.add_subparsers(dest="cmd")
1691
-
1692
- def _add_parser(
1693
- names: list[str], *, configure: Callable[[argparse.ArgumentParser], None]
1694
- ) -> None:
1695
- for name in names:
1696
- parser = sub.add_parser(name)
1697
- configure(parser)
1698
-
1699
- _add_parser(
1700
- ["rl_demo.setup", "demo.setup"],
1701
- configure=lambda parser: parser.set_defaults(func=cmd_setup),
1702
- )
1703
-
1704
- def _init_opts(parser):
1705
- parser.add_argument("--template", type=str, default=None, help="Template id to instantiate")
1706
- parser.add_argument(
1707
- "--dest", type=str, default=None, help="Destination directory for files"
1708
- )
1709
- parser.set_defaults(func=cmd_init)
1710
-
1711
- _add_parser(["rl_demo.init", "demo.init"], configure=_init_opts)
1712
-
1713
- # (prepare command removed)
1714
-
1715
- def _deploy_opts(parser):
1716
- parser.add_argument(
1717
- "--local", action="store_true", help="Run local FastAPI instead of Modal deploy"
1718
- )
1719
- parser.add_argument(
1720
- "--app", type=str, default=None, help="Path to Modal app.py for uv run modal deploy"
1721
- )
1722
- parser.add_argument("--name", type=str, default=None, help="Modal app name")
1723
- parser.add_argument(
1724
- "--script", type=str, default=None, help="Path to deploy_task_app.sh (optional legacy)"
1725
- )
1726
- parser.set_defaults(func=cmd_deploy)
1727
-
1728
- _add_parser(["rl_demo.deploy", "demo.deploy"], configure=_deploy_opts)
1729
-
1730
- _add_parser(
1731
- ["rl_demo.configure", "demo.configure"],
1732
- configure=lambda parser: parser.set_defaults(func=cmd_run),
1733
- )
1734
-
1735
- def _run_opts(parser):
1736
- parser.add_argument(
1737
- "--config", type=str, default=None, help="Path to TOML config (skip prompt)"
1738
- )
1739
- parser.add_argument("--batch-size", type=int, default=None)
1740
- parser.add_argument("--group-size", type=int, default=None)
1741
- parser.add_argument("--model", type=str, default=None)
1742
- parser.add_argument("--timeout", type=int, default=600)
1743
- parser.add_argument("--dry-run", action="store_true", help="Print request body and exit")
1744
- parser.set_defaults(func=cmd_run)
1745
-
1746
- _add_parser(["run", "rl_demo.run", "demo.run"], configure=_run_opts)
1747
-
1748
- args = p.parse_args(argv)
1749
- if not hasattr(args, "func"):
1750
- p.print_help()
1751
- return 1
1752
- return int(args.func(args) or 0)
1753
-
1754
-
1755
- if __name__ == "__main__":
1756
- sys.exit(main())
@@ -1,7 +1,7 @@
1
1
  """Compatibility wrapper for the GRPO Crafter task app.
2
2
 
3
3
  This module now delegates to the TaskAppConfig defined in the local example at
4
- `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
4
+ `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling).
6
6
  Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
7
7
  """
@@ -21,25 +21,37 @@ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
21
21
 
22
22
 
23
23
  def _load_build_config():
24
- # Find synth_ai package location to locate examples/
25
- import synth_ai
24
+ """Load the example's build_config, preferring package import with file fallback."""
25
+ # First try to import by package name (installed 'examples' package)
26
+ try:
27
+ module = importlib.import_module("examples.task_apps.crafter.task_app.grpo_crafter")
28
+ return module.build_config # type: ignore[attr-defined]
29
+ except Exception:
30
+ # Fallback: locate the file within the installed synth_ai distribution and exec it
31
+ import sys as _sys
32
+
33
+ import synth_ai
34
+
35
+ synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
36
+ module_path = (
37
+ synth_ai_path / "examples" / "task_apps" / "crafter" / "task_app" / "grpo_crafter.py"
38
+ )
26
39
 
27
- synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
28
- module_path = synth_ai_path / "examples" / "warming_up_to_rl" / "task_app" / "grpo_crafter.py"
40
+ if not module_path.exists():
41
+ raise ImportError(
42
+ f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
43
+ ) from None
29
44
 
30
- if not module_path.exists():
31
- raise ImportError(
32
- f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
45
+ spec = importlib.util.spec_from_file_location(
46
+ "examples.task_apps.crafter.task_app.grpo_crafter", module_path
33
47
  )
48
+ if spec is None or spec.loader is None:
49
+ raise ImportError(f"Could not load task app module at {module_path}") from None
34
50
 
35
- spec = importlib.util.spec_from_file_location(
36
- "warming_up_to_rl.task_app.grpo_crafter", module_path
37
- )
38
- if spec is None or spec.loader is None:
39
- raise ImportError(f"Could not load task app module at {module_path}")
40
- module = importlib.util.module_from_spec(spec)
41
- spec.loader.exec_module(module)
42
- return module.build_config
51
+ module = importlib.util.module_from_spec(spec)
52
+ _sys.modules[spec.name] = module
53
+ spec.loader.exec_module(module)
54
+ return module.build_config # type: ignore[attr-defined]
43
55
 
44
56
 
45
57
  build_config = _load_build_config()
@@ -190,6 +190,22 @@ class SynthCrafterObservationCallable(GetObservationCallable):
190
190
  obs_dict["truncated"] = priv.truncated
191
191
  if pub.error_info:
192
192
  obs_dict["tool_error"] = pub.error_info
193
+ counts_payload = {}
194
+ try:
195
+ counts = getattr(priv, "achievements_current_values", {}) or {}
196
+ for k, v in counts.items():
197
+ try:
198
+ counts_payload[str(k)] = int(v)
199
+ except Exception:
200
+ try:
201
+ counts_payload[str(k)] = int(float(v))
202
+ except Exception:
203
+ continue
204
+ if counts_payload:
205
+ obs_dict["achievements_counts"] = counts_payload
206
+ except Exception:
207
+ # Best effort; omit counts if coercion fails
208
+ pass
193
209
 
194
210
  # Derive a simple local semantic patch around the player for easy rendering
195
211
  try:
@@ -26,7 +26,10 @@ from synth_ai.environments.examples.enron.taskset import EnronTaskInstance
26
26
 
27
27
  # SQLite-backed helpers
28
28
  from synth_ai.environments.stateful.engine import StatefulEngine, StatefulEngineSnapshot
29
- from synth_ai.zyk import LM # Import LM class
29
+ try: # pragma: no cover - optional dependency
30
+ from synth_ai.zyk import LM # type: ignore
31
+ except ImportError: # pragma: no cover - fallback when LM unavailable
32
+ LM = None
30
33
 
31
34
  # --------------------------------------------------------------------------- actions
32
35
  ACTION_SEARCH = "search"
@@ -244,7 +247,9 @@ class EnronEngine(StatefulEngine):
244
247
  async def determine_if_answer_is_correct(
245
248
  question: str, gold_answer: str, agent_answer: str
246
249
  ) -> bool:
247
- # Instantiate LM for the judge
250
+ if LM is None:
251
+ return gold_answer.strip().lower() == agent_answer.strip().lower()
252
+
248
253
  llm = LM(model_name="gpt-4.1-nano", formatting_model_name="gpt-4.1-nano", temperature=0.0)
249
254
 
250
255
  system_prompt = (
@@ -9,6 +9,7 @@ from synth_ai.environments.environment.shared_engine import (
9
9
  InternalObservation,
10
10
  )
11
11
  from synth_ai.environments.environment.tools import (
12
+ AbstractTool,
12
13
  TOOL_REGISTRY,
13
14
  EnvToolCall,
14
15
  ToolResult,
@@ -65,6 +66,73 @@ class Terminate(EnvToolCall):
65
66
  self.action = (ACTION_ANSWER, "")
66
67
 
67
68
 
69
+ class TerminateArgs(BaseModel):
70
+ pass
71
+
72
+
73
+ class SearchEmailsTool(AbstractTool):
74
+ name = "search_emails"
75
+ call_schema = SearchEmailsArgs
76
+
77
+ def __init__(self, engine: EnronEngine):
78
+ self.engine = engine
79
+
80
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
81
+ try:
82
+ args = self.call_schema.model_validate(call.args or {})
83
+ results = await self.engine.search_emails_action(args.model_dump())
84
+ return ToolResult(ok=True, payload={"search_results": results})
85
+ except Exception as exc: # pragma: no cover - runtime safety
86
+ return ToolResult(ok=False, error=str(exc))
87
+
88
+
89
+ class ReadEmailTool(AbstractTool):
90
+ name = "read_email"
91
+ call_schema = ReadEmailArgs
92
+
93
+ def __init__(self, engine: EnronEngine):
94
+ self.engine = engine
95
+
96
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
97
+ try:
98
+ args = self.call_schema.model_validate(call.args or {})
99
+ email = await self.engine.read_email_action(args.message_id)
100
+ return ToolResult(ok=True, payload={"email": email})
101
+ except Exception as exc: # pragma: no cover
102
+ return ToolResult(ok=False, error=str(exc))
103
+
104
+
105
+ class AnswerQuestionTool(AbstractTool):
106
+ name = "answer_question"
107
+ call_schema = AnswerQuestionArgs
108
+
109
+ def __init__(self, engine: EnronEngine):
110
+ self.engine = engine
111
+
112
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
113
+ try:
114
+ args = self.call_schema.model_validate(call.args or {})
115
+ await self.engine.answer_question_action(args.answer)
116
+ return ToolResult(ok=True, payload={"status": "answer_recorded"})
117
+ except Exception as exc: # pragma: no cover
118
+ return ToolResult(ok=False, error=str(exc))
119
+
120
+
121
+ class TerminateTool(AbstractTool):
122
+ name = "terminate"
123
+ call_schema = TerminateArgs
124
+
125
+ def __init__(self, engine: EnronEngine):
126
+ self.engine = engine
127
+
128
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
129
+ try:
130
+ await self.engine.answer_question_action("")
131
+ return ToolResult(ok=True, payload={"status": "terminated"})
132
+ except Exception as exc: # pragma: no cover
133
+ return ToolResult(ok=False, error=str(exc))
134
+
135
+
68
136
  # -------- observation callable (optional for formatted observations)
69
137
  class SynthEnronObservationCallable(GetObservationCallable):
70
138
  async def get_observation(