synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
- examples/multi_step/crafter_rl_lora.md +51 -10
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +7 -1
- examples/swe/task_app/grpo_swe_mini.py +55 -26
- examples/swe/task_app/hosted/rollout.py +40 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/task_app/__init__.py +2 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
- examples/task_apps/pokemon_red/task_app.py +606 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
- examples/warming_up_to_rl/run_eval.py +127 -18
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +41 -1
- synth_ai/api/train/builders.py +73 -29
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +44 -0
- synth_ai/api/train/configs/rl.py +134 -0
- synth_ai/api/train/configs/sft.py +95 -0
- synth_ai/api/train/configs/shared.py +24 -0
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +7 -51
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +49 -43
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/rl_demo.py +86 -106
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/task_apps.py +1710 -186
- synth_ai/demos/core/cli.py +121 -159
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
- synth_ai/environments/examples/crafter_classic/environment.py +16 -0
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +30 -4
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +127 -0
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +14 -5
- synth_ai/task/contracts.py +124 -38
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +53 -0
- synth_ai/task/rubrics/loaders.py +133 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +113 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/server.py +8 -7
- synth_ai/task/validators.py +269 -6
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/native_manager.py +3 -3
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
synth_ai/demos/core/cli.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import argparse
|
|
4
3
|
import contextlib
|
|
5
4
|
import json
|
|
6
5
|
import os
|
|
@@ -45,7 +44,7 @@ def _is_modal_public_url(u: str) -> bool:
|
|
|
45
44
|
return False
|
|
46
45
|
|
|
47
46
|
|
|
48
|
-
def
|
|
47
|
+
def setup() -> int:
|
|
49
48
|
# Change to demo directory if stored
|
|
50
49
|
demo_dir = demo_core.load_demo_dir()
|
|
51
50
|
if demo_dir and os.path.isdir(demo_dir):
|
|
@@ -760,7 +759,9 @@ def _ensure_task_app_ready(env: DemoEnv, synth_key: str, *, label: str) -> DemoE
|
|
|
760
759
|
return updated_env
|
|
761
760
|
|
|
762
761
|
|
|
763
|
-
def
|
|
762
|
+
def deploy(
|
|
763
|
+
local: bool = False, app: str | None = None, name: str | None = None, script: str | None = None
|
|
764
|
+
) -> int:
|
|
764
765
|
# Change to demo directory if stored
|
|
765
766
|
demo_dir = demo_core.load_demo_dir()
|
|
766
767
|
if demo_dir and os.path.isdir(demo_dir):
|
|
@@ -774,7 +775,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
774
775
|
url = ""
|
|
775
776
|
app_name = env.task_app_name or ""
|
|
776
777
|
try:
|
|
777
|
-
if
|
|
778
|
+
if local:
|
|
778
779
|
print("Starting local Task App…")
|
|
779
780
|
import subprocess
|
|
780
781
|
|
|
@@ -798,7 +799,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
798
799
|
time.sleep(1)
|
|
799
800
|
else:
|
|
800
801
|
# Auto-detect app path if not supplied; prompt interactively from discovered ASGI apps
|
|
801
|
-
app_path = os.path.abspath(
|
|
802
|
+
app_path = os.path.abspath(app) if app else None
|
|
802
803
|
if not app_path or not os.path.isfile(app_path):
|
|
803
804
|
# First pass: look for known common filenames
|
|
804
805
|
candidates = [
|
|
@@ -828,13 +829,13 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
828
829
|
choice = 1
|
|
829
830
|
choice = max(1, min(choice, len(found)))
|
|
830
831
|
app_path = str(found[choice - 1].resolve())
|
|
831
|
-
if not app_path and
|
|
832
|
+
if not app_path and script:
|
|
832
833
|
# Legacy script fallback if user supplied --script explicitly
|
|
833
834
|
from synth_ai.demos.demo_task_apps.math.deploy_modal import deploy as modal_deploy
|
|
834
835
|
|
|
835
|
-
url = modal_deploy(script_path=
|
|
836
|
-
if
|
|
837
|
-
app_name =
|
|
836
|
+
url = modal_deploy(script_path=script, env_api_key=env.env_api_key)
|
|
837
|
+
if name:
|
|
838
|
+
app_name = name
|
|
838
839
|
else:
|
|
839
840
|
if not app_path:
|
|
840
841
|
entered = input("Path to Modal app.py (e.g., ./task_app.py): ").strip()
|
|
@@ -845,7 +846,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
845
846
|
raise FileNotFoundError(f"App file not found: {app_path}")
|
|
846
847
|
# Surface the app path before asking for the name
|
|
847
848
|
print(f"Using task app: {app_path}")
|
|
848
|
-
existing_name = (
|
|
849
|
+
existing_name = (name or env.task_app_name or "").strip()
|
|
849
850
|
if not existing_name:
|
|
850
851
|
existing_name = f"synth-{os.path.splitext(os.path.basename(app_path))[0]}"
|
|
851
852
|
suggested_name = existing_name
|
|
@@ -1128,7 +1129,7 @@ def _ensure_modal_installed() -> None:
|
|
|
1128
1129
|
print("\n You can deploy later after authenticating.\n")
|
|
1129
1130
|
|
|
1130
1131
|
|
|
1131
|
-
def
|
|
1132
|
+
def init(template: str | None = None, dest: str | None = None, force: bool = False) -> int:
|
|
1132
1133
|
"""Materialise a demo task app template into the current directory."""
|
|
1133
1134
|
|
|
1134
1135
|
templates = list(list_demo_templates())
|
|
@@ -1137,37 +1138,44 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1137
1138
|
return 1
|
|
1138
1139
|
|
|
1139
1140
|
selected: DemoTemplate | None = None
|
|
1140
|
-
if
|
|
1141
|
-
selected = get_demo_template(
|
|
1141
|
+
if template:
|
|
1142
|
+
selected = get_demo_template(template)
|
|
1142
1143
|
if selected is None:
|
|
1143
1144
|
available = ", ".join(t.template_id for t in templates)
|
|
1144
|
-
print(f"Unknown template '{
|
|
1145
|
+
print(f"Unknown template '{template}'. Available: {available}")
|
|
1145
1146
|
return 1
|
|
1146
1147
|
else:
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
print(
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1148
|
+
if force:
|
|
1149
|
+
selected = templates[0]
|
|
1150
|
+
print(
|
|
1151
|
+
f"Using default template: {selected.name} ({selected.template_id}) "
|
|
1152
|
+
f"(pass --template to choose another)"
|
|
1153
|
+
)
|
|
1154
|
+
else:
|
|
1155
|
+
print("Select a demo template:" + "\n")
|
|
1156
|
+
for idx, tpl in enumerate(templates, start=1):
|
|
1157
|
+
print(f" [{idx}] {tpl.name} ({tpl.template_id})")
|
|
1158
|
+
print(f" {tpl.description}")
|
|
1159
|
+
try:
|
|
1160
|
+
choice_raw = input(f"Enter choice [1-{len(templates)}] (default 1): ").strip() or "1"
|
|
1161
|
+
except Exception:
|
|
1162
|
+
choice_raw = "1"
|
|
1163
|
+
if not choice_raw.isdigit():
|
|
1164
|
+
print("Selection must be a number.")
|
|
1165
|
+
return 1
|
|
1166
|
+
choice_idx = int(choice_raw)
|
|
1167
|
+
if not 1 <= choice_idx <= len(templates):
|
|
1168
|
+
print("Selection out of range.")
|
|
1169
|
+
return 1
|
|
1170
|
+
selected = templates[choice_idx - 1]
|
|
1163
1171
|
|
|
1164
1172
|
assert selected is not None
|
|
1165
1173
|
|
|
1166
1174
|
default_subdir = selected.default_subdir or selected.template_id
|
|
1167
1175
|
|
|
1168
1176
|
# Check if default destination is already occupied and switch to local_demos/ if needed
|
|
1169
|
-
if
|
|
1170
|
-
default_dest = Path(
|
|
1177
|
+
if dest:
|
|
1178
|
+
default_dest = Path(dest).expanduser().resolve()
|
|
1171
1179
|
else:
|
|
1172
1180
|
primary_dest = Path.cwd() / default_subdir
|
|
1173
1181
|
if primary_dest.exists() and any(primary_dest.iterdir()):
|
|
@@ -1176,10 +1184,13 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1176
1184
|
else:
|
|
1177
1185
|
default_dest = primary_dest.resolve()
|
|
1178
1186
|
|
|
1179
|
-
|
|
1180
|
-
dest_input = input(f"Destination directory [{default_dest}]: ").strip()
|
|
1181
|
-
except Exception:
|
|
1187
|
+
if force:
|
|
1182
1188
|
dest_input = ""
|
|
1189
|
+
else:
|
|
1190
|
+
try:
|
|
1191
|
+
dest_input = input(f"Destination directory [{default_dest}]: ").strip()
|
|
1192
|
+
except Exception:
|
|
1193
|
+
dest_input = ""
|
|
1183
1194
|
destination = Path(dest_input).expanduser().resolve() if dest_input else default_dest
|
|
1184
1195
|
|
|
1185
1196
|
# Track whether we should skip individual file prompts (if we already cleared the directory)
|
|
@@ -1190,15 +1201,18 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1190
1201
|
print(f"Destination {destination} is a file. Provide a directory path.")
|
|
1191
1202
|
return 1
|
|
1192
1203
|
if any(destination.iterdir()):
|
|
1193
|
-
|
|
1194
|
-
response =
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1204
|
+
if force:
|
|
1205
|
+
response = "y"
|
|
1206
|
+
else:
|
|
1207
|
+
try:
|
|
1208
|
+
response = (
|
|
1209
|
+
input(f"Destination {destination} is not empty. Overwrite? [y/N]: ")
|
|
1210
|
+
.strip()
|
|
1211
|
+
.lower()
|
|
1212
|
+
)
|
|
1213
|
+
except (EOFError, KeyboardInterrupt):
|
|
1214
|
+
print("\nCancelled.")
|
|
1215
|
+
return 1
|
|
1202
1216
|
if response not in ("y", "yes"):
|
|
1203
1217
|
print("Cancelled. Choose another directory or delete the existing one.")
|
|
1204
1218
|
return 1
|
|
@@ -1236,15 +1250,18 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1236
1250
|
# Handle directory copying
|
|
1237
1251
|
if src_path.is_dir():
|
|
1238
1252
|
if dest_path.exists() and not directory_cleared:
|
|
1239
|
-
|
|
1240
|
-
response =
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1253
|
+
if force:
|
|
1254
|
+
response = "y"
|
|
1255
|
+
else:
|
|
1256
|
+
try:
|
|
1257
|
+
response = (
|
|
1258
|
+
input(f"Directory {dest_path.name} exists. Overwrite? [y/N]: ")
|
|
1259
|
+
.strip()
|
|
1260
|
+
.lower()
|
|
1261
|
+
)
|
|
1262
|
+
except (EOFError, KeyboardInterrupt):
|
|
1263
|
+
print("\nCancelled.")
|
|
1264
|
+
return 1
|
|
1248
1265
|
if response not in ("y", "yes"):
|
|
1249
1266
|
print(f"Skipping {dest_path.name}")
|
|
1250
1267
|
continue
|
|
@@ -1256,15 +1273,18 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1256
1273
|
# Handle file copying
|
|
1257
1274
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1258
1275
|
if dest_path.exists() and not directory_cleared:
|
|
1259
|
-
|
|
1260
|
-
response =
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1276
|
+
if force:
|
|
1277
|
+
response = "y"
|
|
1278
|
+
else:
|
|
1279
|
+
try:
|
|
1280
|
+
response = (
|
|
1281
|
+
input(f"File {dest_path.name} exists. Overwrite? [y/N]: ")
|
|
1282
|
+
.strip()
|
|
1283
|
+
.lower()
|
|
1284
|
+
)
|
|
1285
|
+
except (EOFError, KeyboardInterrupt):
|
|
1286
|
+
print("\nCancelled.")
|
|
1287
|
+
return 1
|
|
1268
1288
|
if response not in ("y", "yes"):
|
|
1269
1289
|
print(f"Skipping {dest_path.name}")
|
|
1270
1290
|
continue
|
|
@@ -1280,11 +1300,14 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1280
1300
|
env_path = destination / ".env"
|
|
1281
1301
|
should_write = True
|
|
1282
1302
|
if env_path.exists() and not directory_cleared:
|
|
1283
|
-
|
|
1284
|
-
response =
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1303
|
+
if force:
|
|
1304
|
+
response = "y"
|
|
1305
|
+
else:
|
|
1306
|
+
try:
|
|
1307
|
+
response = input("File .env exists. Overwrite? [y/N]: ").strip().lower()
|
|
1308
|
+
except (EOFError, KeyboardInterrupt):
|
|
1309
|
+
print("\nCancelled.")
|
|
1310
|
+
return 1
|
|
1288
1311
|
should_write = response in ("y", "yes")
|
|
1289
1312
|
if should_write:
|
|
1290
1313
|
_write_text(env_path, "\n".join(selected.env_lines) + "\n")
|
|
@@ -1296,13 +1319,16 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1296
1319
|
cfg_dst = (destination / selected.config_destination).resolve()
|
|
1297
1320
|
should_copy = True
|
|
1298
1321
|
if cfg_dst.exists() and not directory_cleared:
|
|
1299
|
-
|
|
1300
|
-
response =
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1322
|
+
if force:
|
|
1323
|
+
response = "y"
|
|
1324
|
+
else:
|
|
1325
|
+
try:
|
|
1326
|
+
response = (
|
|
1327
|
+
input(f"File {cfg_dst.name} exists. Overwrite? [y/N]: ").strip().lower()
|
|
1328
|
+
)
|
|
1329
|
+
except (EOFError, KeyboardInterrupt):
|
|
1330
|
+
print("\nCancelled.")
|
|
1331
|
+
return 1
|
|
1306
1332
|
should_copy = response in ("y", "yes")
|
|
1307
1333
|
if should_copy:
|
|
1308
1334
|
cfg_dst.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -1388,7 +1414,14 @@ def _write_text(path: str, content: str) -> None:
|
|
|
1388
1414
|
# Note: `prepare` command has been removed; configuration now prepares TOML
|
|
1389
1415
|
|
|
1390
1416
|
|
|
1391
|
-
def
|
|
1417
|
+
def run(
|
|
1418
|
+
config: str | None = None,
|
|
1419
|
+
batch_size: int | None = None,
|
|
1420
|
+
group_size: int | None = None,
|
|
1421
|
+
model: str | None = None,
|
|
1422
|
+
timeout: int = 600,
|
|
1423
|
+
dry_run: bool = False,
|
|
1424
|
+
) -> int:
|
|
1392
1425
|
# Change to demo directory if stored
|
|
1393
1426
|
demo_dir = demo_core.load_demo_dir()
|
|
1394
1427
|
if demo_dir and os.path.isdir(demo_dir):
|
|
@@ -1429,7 +1462,7 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
1429
1462
|
import tomllib
|
|
1430
1463
|
|
|
1431
1464
|
try:
|
|
1432
|
-
cfg_path = _select_or_create_config(
|
|
1465
|
+
cfg_path = _select_or_create_config(config, env)
|
|
1433
1466
|
except FileNotFoundError as exc:
|
|
1434
1467
|
print(exc)
|
|
1435
1468
|
return 1
|
|
@@ -1451,12 +1484,12 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
1451
1484
|
# Optional: TRAINER_START_URL passthrough if already set in environment
|
|
1452
1485
|
run_env["TRAINER_START_URL"] = run_env.get("TRAINER_START_URL", "")
|
|
1453
1486
|
# Forward convenience knobs
|
|
1454
|
-
if
|
|
1455
|
-
run_env["RL_BATCH_SIZE"] = str(int(
|
|
1456
|
-
if
|
|
1457
|
-
run_env["RL_GROUP_SIZE"] = str(int(
|
|
1458
|
-
if
|
|
1459
|
-
run_env["RL_MODEL"] =
|
|
1487
|
+
if batch_size is not None:
|
|
1488
|
+
run_env["RL_BATCH_SIZE"] = str(int(batch_size))
|
|
1489
|
+
if group_size is not None:
|
|
1490
|
+
run_env["RL_GROUP_SIZE"] = str(int(group_size))
|
|
1491
|
+
if model:
|
|
1492
|
+
run_env["RL_MODEL"] = model
|
|
1460
1493
|
cmd = ["uv", "run", "python", launcher]
|
|
1461
1494
|
print(f"Launching monorepo clustered runner: {' '.join(cmd)}")
|
|
1462
1495
|
code = _popen_stream(cmd, env=run_env)
|
|
@@ -1484,11 +1517,11 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
1484
1517
|
inline_cfg = tomllib.load(fh)
|
|
1485
1518
|
with open(cfg_path) as fh2:
|
|
1486
1519
|
toml_text = fh2.read()
|
|
1487
|
-
if
|
|
1488
|
-
inline_cfg.setdefault("training", {})["batch_size"] = int(
|
|
1489
|
-
if
|
|
1490
|
-
inline_cfg.setdefault("training", {})["group_size"] = int(
|
|
1491
|
-
model_name =
|
|
1520
|
+
if batch_size is not None:
|
|
1521
|
+
inline_cfg.setdefault("training", {})["batch_size"] = int(batch_size)
|
|
1522
|
+
if group_size is not None:
|
|
1523
|
+
inline_cfg.setdefault("training", {})["group_size"] = int(group_size)
|
|
1524
|
+
model_name = model or (inline_cfg.get("model", {}) or {}).get("name", "Qwen/Qwen3-0.6B")
|
|
1492
1525
|
api = env.dev_backend_url.rstrip("/") + ("" if env.dev_backend_url.endswith("/api") else "/api")
|
|
1493
1526
|
# Print backend and key preview before request for clearer diagnostics
|
|
1494
1527
|
try:
|
|
@@ -1678,79 +1711,8 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
1678
1711
|
if name == "eval.reward_mean":
|
|
1679
1712
|
print(f"metric eval.reward_mean step={p.get('step')} value={p.get('value')}")
|
|
1680
1713
|
break
|
|
1681
|
-
if time.time() - start_t > (
|
|
1714
|
+
if time.time() - start_t > (timeout or 600):
|
|
1682
1715
|
print("Timeout waiting for terminal state.")
|
|
1683
1716
|
break
|
|
1684
1717
|
time.sleep(2)
|
|
1685
1718
|
return 0
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
def main(argv: list[str] | None = None) -> int:
|
|
1689
|
-
p = argparse.ArgumentParser(prog="synth-ai")
|
|
1690
|
-
sub = p.add_subparsers(dest="cmd")
|
|
1691
|
-
|
|
1692
|
-
def _add_parser(
|
|
1693
|
-
names: list[str], *, configure: Callable[[argparse.ArgumentParser], None]
|
|
1694
|
-
) -> None:
|
|
1695
|
-
for name in names:
|
|
1696
|
-
parser = sub.add_parser(name)
|
|
1697
|
-
configure(parser)
|
|
1698
|
-
|
|
1699
|
-
_add_parser(
|
|
1700
|
-
["rl_demo.setup", "demo.setup"],
|
|
1701
|
-
configure=lambda parser: parser.set_defaults(func=cmd_setup),
|
|
1702
|
-
)
|
|
1703
|
-
|
|
1704
|
-
def _init_opts(parser):
|
|
1705
|
-
parser.add_argument("--template", type=str, default=None, help="Template id to instantiate")
|
|
1706
|
-
parser.add_argument(
|
|
1707
|
-
"--dest", type=str, default=None, help="Destination directory for files"
|
|
1708
|
-
)
|
|
1709
|
-
parser.set_defaults(func=cmd_init)
|
|
1710
|
-
|
|
1711
|
-
_add_parser(["rl_demo.init", "demo.init"], configure=_init_opts)
|
|
1712
|
-
|
|
1713
|
-
# (prepare command removed)
|
|
1714
|
-
|
|
1715
|
-
def _deploy_opts(parser):
|
|
1716
|
-
parser.add_argument(
|
|
1717
|
-
"--local", action="store_true", help="Run local FastAPI instead of Modal deploy"
|
|
1718
|
-
)
|
|
1719
|
-
parser.add_argument(
|
|
1720
|
-
"--app", type=str, default=None, help="Path to Modal app.py for uv run modal deploy"
|
|
1721
|
-
)
|
|
1722
|
-
parser.add_argument("--name", type=str, default=None, help="Modal app name")
|
|
1723
|
-
parser.add_argument(
|
|
1724
|
-
"--script", type=str, default=None, help="Path to deploy_task_app.sh (optional legacy)"
|
|
1725
|
-
)
|
|
1726
|
-
parser.set_defaults(func=cmd_deploy)
|
|
1727
|
-
|
|
1728
|
-
_add_parser(["rl_demo.deploy", "demo.deploy"], configure=_deploy_opts)
|
|
1729
|
-
|
|
1730
|
-
_add_parser(
|
|
1731
|
-
["rl_demo.configure", "demo.configure"],
|
|
1732
|
-
configure=lambda parser: parser.set_defaults(func=cmd_run),
|
|
1733
|
-
)
|
|
1734
|
-
|
|
1735
|
-
def _run_opts(parser):
|
|
1736
|
-
parser.add_argument(
|
|
1737
|
-
"--config", type=str, default=None, help="Path to TOML config (skip prompt)"
|
|
1738
|
-
)
|
|
1739
|
-
parser.add_argument("--batch-size", type=int, default=None)
|
|
1740
|
-
parser.add_argument("--group-size", type=int, default=None)
|
|
1741
|
-
parser.add_argument("--model", type=str, default=None)
|
|
1742
|
-
parser.add_argument("--timeout", type=int, default=600)
|
|
1743
|
-
parser.add_argument("--dry-run", action="store_true", help="Print request body and exit")
|
|
1744
|
-
parser.set_defaults(func=cmd_run)
|
|
1745
|
-
|
|
1746
|
-
_add_parser(["run", "rl_demo.run", "demo.run"], configure=_run_opts)
|
|
1747
|
-
|
|
1748
|
-
args = p.parse_args(argv)
|
|
1749
|
-
if not hasattr(args, "func"):
|
|
1750
|
-
p.print_help()
|
|
1751
|
-
return 1
|
|
1752
|
-
return int(args.func(args) or 0)
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
if __name__ == "__main__":
|
|
1756
|
-
sys.exit(main())
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Compatibility wrapper for the GRPO Crafter task app.
|
|
2
2
|
|
|
3
3
|
This module now delegates to the TaskAppConfig defined in the local example at
|
|
4
|
-
`examples/
|
|
4
|
+
`examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
5
|
(running the file directly or targeting `fastapi_app` from external tooling).
|
|
6
6
|
Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
|
|
7
7
|
"""
|
|
@@ -21,25 +21,37 @@ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def _load_build_config():
|
|
24
|
-
|
|
25
|
-
import
|
|
24
|
+
"""Load the example's build_config, preferring package import with file fallback."""
|
|
25
|
+
# First try to import by package name (installed 'examples' package)
|
|
26
|
+
try:
|
|
27
|
+
module = importlib.import_module("examples.task_apps.crafter.task_app.grpo_crafter")
|
|
28
|
+
return module.build_config # type: ignore[attr-defined]
|
|
29
|
+
except Exception:
|
|
30
|
+
# Fallback: locate the file within the installed synth_ai distribution and exec it
|
|
31
|
+
import sys as _sys
|
|
32
|
+
|
|
33
|
+
import synth_ai
|
|
34
|
+
|
|
35
|
+
synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
|
|
36
|
+
module_path = (
|
|
37
|
+
synth_ai_path / "examples" / "task_apps" / "crafter" / "task_app" / "grpo_crafter.py"
|
|
38
|
+
)
|
|
26
39
|
|
|
27
|
-
|
|
28
|
-
|
|
40
|
+
if not module_path.exists():
|
|
41
|
+
raise ImportError(
|
|
42
|
+
f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
|
|
43
|
+
) from None
|
|
29
44
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
|
|
45
|
+
spec = importlib.util.spec_from_file_location(
|
|
46
|
+
"examples.task_apps.crafter.task_app.grpo_crafter", module_path
|
|
33
47
|
)
|
|
48
|
+
if spec is None or spec.loader is None:
|
|
49
|
+
raise ImportError(f"Could not load task app module at {module_path}") from None
|
|
34
50
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
raise ImportError(f"Could not load task app module at {module_path}")
|
|
40
|
-
module = importlib.util.module_from_spec(spec)
|
|
41
|
-
spec.loader.exec_module(module)
|
|
42
|
-
return module.build_config
|
|
51
|
+
module = importlib.util.module_from_spec(spec)
|
|
52
|
+
_sys.modules[spec.name] = module
|
|
53
|
+
spec.loader.exec_module(module)
|
|
54
|
+
return module.build_config # type: ignore[attr-defined]
|
|
43
55
|
|
|
44
56
|
|
|
45
57
|
build_config = _load_build_config()
|
|
@@ -190,6 +190,22 @@ class SynthCrafterObservationCallable(GetObservationCallable):
|
|
|
190
190
|
obs_dict["truncated"] = priv.truncated
|
|
191
191
|
if pub.error_info:
|
|
192
192
|
obs_dict["tool_error"] = pub.error_info
|
|
193
|
+
counts_payload = {}
|
|
194
|
+
try:
|
|
195
|
+
counts = getattr(priv, "achievements_current_values", {}) or {}
|
|
196
|
+
for k, v in counts.items():
|
|
197
|
+
try:
|
|
198
|
+
counts_payload[str(k)] = int(v)
|
|
199
|
+
except Exception:
|
|
200
|
+
try:
|
|
201
|
+
counts_payload[str(k)] = int(float(v))
|
|
202
|
+
except Exception:
|
|
203
|
+
continue
|
|
204
|
+
if counts_payload:
|
|
205
|
+
obs_dict["achievements_counts"] = counts_payload
|
|
206
|
+
except Exception:
|
|
207
|
+
# Best effort; omit counts if coercion fails
|
|
208
|
+
pass
|
|
193
209
|
|
|
194
210
|
# Derive a simple local semantic patch around the player for easy rendering
|
|
195
211
|
try:
|
|
@@ -26,7 +26,10 @@ from synth_ai.environments.examples.enron.taskset import EnronTaskInstance
|
|
|
26
26
|
|
|
27
27
|
# SQLite-backed helpers
|
|
28
28
|
from synth_ai.environments.stateful.engine import StatefulEngine, StatefulEngineSnapshot
|
|
29
|
-
|
|
29
|
+
try: # pragma: no cover - optional dependency
|
|
30
|
+
from synth_ai.zyk import LM # type: ignore
|
|
31
|
+
except ImportError: # pragma: no cover - fallback when LM unavailable
|
|
32
|
+
LM = None
|
|
30
33
|
|
|
31
34
|
# --------------------------------------------------------------------------- actions
|
|
32
35
|
ACTION_SEARCH = "search"
|
|
@@ -244,7 +247,9 @@ class EnronEngine(StatefulEngine):
|
|
|
244
247
|
async def determine_if_answer_is_correct(
|
|
245
248
|
question: str, gold_answer: str, agent_answer: str
|
|
246
249
|
) -> bool:
|
|
247
|
-
|
|
250
|
+
if LM is None:
|
|
251
|
+
return gold_answer.strip().lower() == agent_answer.strip().lower()
|
|
252
|
+
|
|
248
253
|
llm = LM(model_name="gpt-4.1-nano", formatting_model_name="gpt-4.1-nano", temperature=0.0)
|
|
249
254
|
|
|
250
255
|
system_prompt = (
|
|
@@ -9,6 +9,7 @@ from synth_ai.environments.environment.shared_engine import (
|
|
|
9
9
|
InternalObservation,
|
|
10
10
|
)
|
|
11
11
|
from synth_ai.environments.environment.tools import (
|
|
12
|
+
AbstractTool,
|
|
12
13
|
TOOL_REGISTRY,
|
|
13
14
|
EnvToolCall,
|
|
14
15
|
ToolResult,
|
|
@@ -65,6 +66,73 @@ class Terminate(EnvToolCall):
|
|
|
65
66
|
self.action = (ACTION_ANSWER, "")
|
|
66
67
|
|
|
67
68
|
|
|
69
|
+
class TerminateArgs(BaseModel):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class SearchEmailsTool(AbstractTool):
|
|
74
|
+
name = "search_emails"
|
|
75
|
+
call_schema = SearchEmailsArgs
|
|
76
|
+
|
|
77
|
+
def __init__(self, engine: EnronEngine):
|
|
78
|
+
self.engine = engine
|
|
79
|
+
|
|
80
|
+
async def __call__(self, call: EnvToolCall) -> ToolResult:
|
|
81
|
+
try:
|
|
82
|
+
args = self.call_schema.model_validate(call.args or {})
|
|
83
|
+
results = await self.engine.search_emails_action(args.model_dump())
|
|
84
|
+
return ToolResult(ok=True, payload={"search_results": results})
|
|
85
|
+
except Exception as exc: # pragma: no cover - runtime safety
|
|
86
|
+
return ToolResult(ok=False, error=str(exc))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ReadEmailTool(AbstractTool):
|
|
90
|
+
name = "read_email"
|
|
91
|
+
call_schema = ReadEmailArgs
|
|
92
|
+
|
|
93
|
+
def __init__(self, engine: EnronEngine):
|
|
94
|
+
self.engine = engine
|
|
95
|
+
|
|
96
|
+
async def __call__(self, call: EnvToolCall) -> ToolResult:
|
|
97
|
+
try:
|
|
98
|
+
args = self.call_schema.model_validate(call.args or {})
|
|
99
|
+
email = await self.engine.read_email_action(args.message_id)
|
|
100
|
+
return ToolResult(ok=True, payload={"email": email})
|
|
101
|
+
except Exception as exc: # pragma: no cover
|
|
102
|
+
return ToolResult(ok=False, error=str(exc))
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class AnswerQuestionTool(AbstractTool):
|
|
106
|
+
name = "answer_question"
|
|
107
|
+
call_schema = AnswerQuestionArgs
|
|
108
|
+
|
|
109
|
+
def __init__(self, engine: EnronEngine):
|
|
110
|
+
self.engine = engine
|
|
111
|
+
|
|
112
|
+
async def __call__(self, call: EnvToolCall) -> ToolResult:
|
|
113
|
+
try:
|
|
114
|
+
args = self.call_schema.model_validate(call.args or {})
|
|
115
|
+
await self.engine.answer_question_action(args.answer)
|
|
116
|
+
return ToolResult(ok=True, payload={"status": "answer_recorded"})
|
|
117
|
+
except Exception as exc: # pragma: no cover
|
|
118
|
+
return ToolResult(ok=False, error=str(exc))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class TerminateTool(AbstractTool):
|
|
122
|
+
name = "terminate"
|
|
123
|
+
call_schema = TerminateArgs
|
|
124
|
+
|
|
125
|
+
def __init__(self, engine: EnronEngine):
|
|
126
|
+
self.engine = engine
|
|
127
|
+
|
|
128
|
+
async def __call__(self, call: EnvToolCall) -> ToolResult:
|
|
129
|
+
try:
|
|
130
|
+
await self.engine.answer_question_action("")
|
|
131
|
+
return ToolResult(ok=True, payload={"status": "terminated"})
|
|
132
|
+
except Exception as exc: # pragma: no cover
|
|
133
|
+
return ToolResult(ok=False, error=str(exc))
|
|
134
|
+
|
|
135
|
+
|
|
68
136
|
# -------- observation callable (optional for formatted observations)
|
|
69
137
|
class SynthEnronObservationCallable(GetObservationCallable):
|
|
70
138
|
async def get_observation(
|