synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
- examples/multi_step/crafter_rl_lora.md +51 -10
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +7 -1
- examples/swe/task_app/grpo_swe_mini.py +55 -26
- examples/swe/task_app/hosted/rollout.py +40 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/task_app/__init__.py +2 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
- examples/task_apps/pokemon_red/task_app.py +606 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
- examples/warming_up_to_rl/run_eval.py +127 -18
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +41 -1
- synth_ai/api/train/builders.py +73 -29
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +44 -0
- synth_ai/api/train/configs/rl.py +134 -0
- synth_ai/api/train/configs/sft.py +95 -0
- synth_ai/api/train/configs/shared.py +24 -0
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +7 -51
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +49 -43
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/rl_demo.py +86 -106
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/task_apps.py +1710 -186
- synth_ai/demos/core/cli.py +121 -159
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
- synth_ai/environments/examples/crafter_classic/environment.py +16 -0
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +30 -4
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +127 -0
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +14 -5
- synth_ai/task/contracts.py +124 -38
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +53 -0
- synth_ai/task/rubrics/loaders.py +133 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +113 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/server.py +8 -7
- synth_ai/task/validators.py +269 -6
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/native_manager.py +3 -3
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test battle state formatting - separate presentation for battle vs normal gameplay.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
from utils.state_formatter import format_state_for_llm
|
|
8
|
+
from pokemon_env.enums import MetatileBehavior
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_battle_mode_hides_map():
|
|
12
|
+
"""Test that battle mode doesn't show map information."""
|
|
13
|
+
battle_state = {
|
|
14
|
+
'player': {
|
|
15
|
+
'name': 'Red',
|
|
16
|
+
'position': {'x': 10, 'y': 10},
|
|
17
|
+
'party': [
|
|
18
|
+
{'species_name': 'Pikachu', 'level': 25, 'current_hp': 50, 'max_hp': 75, 'status': 'Normal'}
|
|
19
|
+
]
|
|
20
|
+
},
|
|
21
|
+
'game': {
|
|
22
|
+
'is_in_battle': True,
|
|
23
|
+
'battle_info': {
|
|
24
|
+
'player_pokemon': {'species': 'Pikachu', 'level': 25, 'current_hp': 50, 'max_hp': 75},
|
|
25
|
+
'opponent_pokemon': {'species': 'Zubat', 'level': 10, 'current_hp': 20, 'max_hp': 30}
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
'map': {
|
|
29
|
+
'tiles': [[(1, MetatileBehavior.NORMAL, 0, 0)] * 3] * 3,
|
|
30
|
+
'current_map': 'Route 1'
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
formatted = format_state_for_llm(battle_state)
|
|
35
|
+
|
|
36
|
+
# Should show battle mode indicator
|
|
37
|
+
assert "=== BATTLE MODE ===" in formatted
|
|
38
|
+
assert "Currently in battle" in formatted
|
|
39
|
+
|
|
40
|
+
# Should show battle status
|
|
41
|
+
assert "=== BATTLE STATUS ===" in formatted
|
|
42
|
+
assert "Your Pokemon: Pikachu" in formatted
|
|
43
|
+
assert "Opponent: Zubat" in formatted
|
|
44
|
+
|
|
45
|
+
# Should NOT show map
|
|
46
|
+
assert "LOCATION & MAP INFO" not in formatted
|
|
47
|
+
assert "TRAVERSABILITY MAP" not in formatted
|
|
48
|
+
assert "Route 1" not in formatted
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_battle_mode_hides_dialogue():
|
|
52
|
+
"""Test that battle mode doesn't show dialogue information."""
|
|
53
|
+
battle_state = {
|
|
54
|
+
'player': {'name': 'Red'},
|
|
55
|
+
'game': {
|
|
56
|
+
'is_in_battle': True,
|
|
57
|
+
'dialog_text': 'Trainer wants to battle!', # This might be residual
|
|
58
|
+
'battle_info': {
|
|
59
|
+
'player_pokemon': {'species': 'Charmander', 'level': 5, 'current_hp': 18, 'max_hp': 20},
|
|
60
|
+
'opponent_pokemon': {'species': 'Rattata', 'level': 3, 'current_hp': 10, 'max_hp': 15}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
formatted = format_state_for_llm(battle_state)
|
|
66
|
+
|
|
67
|
+
# Should show battle info
|
|
68
|
+
assert "Charmander" in formatted
|
|
69
|
+
assert "Rattata" in formatted
|
|
70
|
+
|
|
71
|
+
# Should NOT show dialogue
|
|
72
|
+
assert "--- DIALOGUE ---" not in formatted
|
|
73
|
+
assert "Trainer wants to battle" not in formatted
|
|
74
|
+
assert "RESIDUAL TEXT" not in formatted
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_normal_mode_shows_everything():
|
|
78
|
+
"""Test that normal (non-battle) mode shows all information."""
|
|
79
|
+
normal_state = {
|
|
80
|
+
'player': {
|
|
81
|
+
'name': 'Red',
|
|
82
|
+
'position': {'x': 10, 'y': 10},
|
|
83
|
+
'facing': 'North',
|
|
84
|
+
'party': [
|
|
85
|
+
{'species_name': 'Squirtle', 'level': 10, 'current_hp': 30, 'max_hp': 35, 'status': 'Normal'}
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
'game': {
|
|
89
|
+
'is_in_battle': False,
|
|
90
|
+
'dialog_text': 'Welcome to the Pokemon Center!',
|
|
91
|
+
'dialogue_detected': {'has_dialogue': True, 'confidence': 0.9}
|
|
92
|
+
},
|
|
93
|
+
'map': {
|
|
94
|
+
'tiles': [[(1, MetatileBehavior.NORMAL, 0, 0)] * 3] * 3,
|
|
95
|
+
'current_map': 'Pokemon Center'
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
formatted = format_state_for_llm(normal_state)
|
|
100
|
+
|
|
101
|
+
# Should show normal player info
|
|
102
|
+
assert "=== PLAYER INFO ===" in formatted
|
|
103
|
+
assert "Position: X=10, Y=10" in formatted
|
|
104
|
+
assert "Facing: North" in formatted
|
|
105
|
+
|
|
106
|
+
# Should show map
|
|
107
|
+
assert "LOCATION & MAP INFO" in formatted
|
|
108
|
+
|
|
109
|
+
# Should show dialogue
|
|
110
|
+
assert "--- DIALOGUE ---" in formatted
|
|
111
|
+
assert "Welcome to the Pokemon Center" in formatted
|
|
112
|
+
assert "Detection confidence: 90.0%" in formatted
|
|
113
|
+
|
|
114
|
+
# Should NOT show battle mode indicator
|
|
115
|
+
assert "=== BATTLE MODE ===" not in formatted
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_battle_party_information():
|
|
119
|
+
"""Test that battle mode shows full party for switching decisions."""
|
|
120
|
+
battle_state = {
|
|
121
|
+
'player': {
|
|
122
|
+
'name': 'Red',
|
|
123
|
+
'party': [
|
|
124
|
+
{'species_name': 'Venusaur', 'level': 50, 'current_hp': 0, 'max_hp': 200, 'status': 'Fainted'},
|
|
125
|
+
{'species_name': 'Charizard', 'level': 50, 'current_hp': 180, 'max_hp': 185, 'status': 'Normal'},
|
|
126
|
+
{'species_name': 'Blastoise', 'level': 50, 'current_hp': 100, 'max_hp': 190, 'status': 'Poisoned'}
|
|
127
|
+
]
|
|
128
|
+
},
|
|
129
|
+
'game': {
|
|
130
|
+
'in_battle': True, # Alternative key
|
|
131
|
+
'battle_info': {
|
|
132
|
+
'player_pokemon': {'species': 'Venusaur', 'level': 50, 'current_hp': 0, 'max_hp': 200},
|
|
133
|
+
'opponent_pokemon': {'species': 'Alakazam', 'level': 55, 'current_hp': 150, 'max_hp': 160}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
formatted = format_state_for_llm(battle_state)
|
|
139
|
+
|
|
140
|
+
# Should show party status section
|
|
141
|
+
assert "=== PARTY STATUS ===" in formatted
|
|
142
|
+
|
|
143
|
+
# Should list all party members
|
|
144
|
+
assert "Venusaur" in formatted
|
|
145
|
+
assert "Charizard" in formatted
|
|
146
|
+
assert "Blastoise" in formatted
|
|
147
|
+
|
|
148
|
+
# Should show status conditions
|
|
149
|
+
assert "Fainted" in formatted or "0/200" in formatted # Venusaur fainted
|
|
150
|
+
assert "Poisoned" in formatted # Blastoise poisoned
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_battle_mode_detection_variants():
|
|
154
|
+
"""Test that both is_in_battle and in_battle keys trigger battle mode."""
|
|
155
|
+
# Test with is_in_battle
|
|
156
|
+
state1 = {
|
|
157
|
+
'player': {'name': 'Red'},
|
|
158
|
+
'game': {
|
|
159
|
+
'is_in_battle': True,
|
|
160
|
+
'battle_info': {'player_pokemon': {'species': 'Mew'}}
|
|
161
|
+
},
|
|
162
|
+
'map': {'current_map': 'Should not appear'}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
formatted1 = format_state_for_llm(state1)
|
|
166
|
+
assert "=== BATTLE MODE ===" in formatted1
|
|
167
|
+
assert "Should not appear" not in formatted1
|
|
168
|
+
|
|
169
|
+
# Test with in_battle
|
|
170
|
+
state2 = {
|
|
171
|
+
'player': {'name': 'Blue'},
|
|
172
|
+
'game': {
|
|
173
|
+
'in_battle': True,
|
|
174
|
+
'battle_info': {'player_pokemon': {'species': 'Mewtwo'}}
|
|
175
|
+
},
|
|
176
|
+
'map': {'current_map': 'Also should not appear'}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
formatted2 = format_state_for_llm(state2)
|
|
180
|
+
assert "=== BATTLE MODE ===" in formatted2
|
|
181
|
+
assert "Also should not appear" not in formatted2
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def test_empty_battle_info():
|
|
185
|
+
"""Test handling of battle mode with missing battle info."""
|
|
186
|
+
state = {
|
|
187
|
+
'player': {'name': 'Red'},
|
|
188
|
+
'game': {
|
|
189
|
+
'is_in_battle': True,
|
|
190
|
+
# No battle_info provided
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
formatted = format_state_for_llm(state)
|
|
195
|
+
|
|
196
|
+
# Should still enter battle mode
|
|
197
|
+
assert "=== BATTLE MODE ===" in formatted
|
|
198
|
+
|
|
199
|
+
# Should handle missing battle info gracefully
|
|
200
|
+
assert "=== PARTY STATUS ===" in formatted # Still shows party section
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
if __name__ == "__main__":
|
|
204
|
+
pytest.main([__file__, "-v"])
|
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test dialogue detection functionality.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
import numpy as np
|
|
8
|
+
from utils.state_formatter import detect_dialogue_on_frame, format_state_for_llm
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_dialogue_detection_with_blue_box():
|
|
12
|
+
"""Test dialogue detection with typical blue dialogue box."""
|
|
13
|
+
# Create a mock frame with dialogue box characteristics (240x160 GBA resolution)
|
|
14
|
+
frame = np.zeros((160, 240, 3), dtype=np.uint8)
|
|
15
|
+
|
|
16
|
+
# Add blue dialogue box in bottom 50 pixels
|
|
17
|
+
# Blue color (R, G, B) where blue is dominant
|
|
18
|
+
frame[110:160, :] = [50, 70, 150] # Bluish background
|
|
19
|
+
|
|
20
|
+
# Add some white text areas
|
|
21
|
+
frame[120:130, 20:220] = [220, 220, 220] # White text area
|
|
22
|
+
|
|
23
|
+
# Add border lines
|
|
24
|
+
frame[110:112, :] = [100, 100, 100] # Top border
|
|
25
|
+
frame[158:160, :] = [100, 100, 100] # Bottom border
|
|
26
|
+
|
|
27
|
+
result = detect_dialogue_on_frame(frame_array=frame)
|
|
28
|
+
|
|
29
|
+
assert result['has_dialogue'] == True
|
|
30
|
+
assert result['confidence'] > 0.5
|
|
31
|
+
assert 'blue dialogue box' in result['reason'].lower()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_no_dialogue_detection():
|
|
35
|
+
"""Test that normal gameplay doesn't trigger dialogue detection."""
|
|
36
|
+
# Create a mock frame with varied gameplay content (no dialogue)
|
|
37
|
+
frame = np.random.randint(0, 255, (160, 240, 3), dtype=np.uint8)
|
|
38
|
+
|
|
39
|
+
# Make it less random - add some structure but not dialogue-like
|
|
40
|
+
frame[0:80, :] = [100, 150, 100] # Greenish top (grass/trees)
|
|
41
|
+
frame[80:160, :] = [150, 130, 100] # Brownish bottom (ground)
|
|
42
|
+
|
|
43
|
+
result = detect_dialogue_on_frame(frame_array=frame)
|
|
44
|
+
|
|
45
|
+
assert result['has_dialogue'] == False
|
|
46
|
+
assert result['confidence'] < 0.5
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_dialogue_detection_grayscale():
|
|
50
|
+
"""Test dialogue detection with grayscale input."""
|
|
51
|
+
# Create a grayscale frame
|
|
52
|
+
frame = np.zeros((160, 240), dtype=np.uint8)
|
|
53
|
+
|
|
54
|
+
# Add high contrast pattern in dialogue area
|
|
55
|
+
frame[110:160, :] = 50 # Dark background
|
|
56
|
+
frame[120:130, 20:220] = 200 # Light text area
|
|
57
|
+
|
|
58
|
+
# Add horizontal edges
|
|
59
|
+
frame[110:112, :] = 150
|
|
60
|
+
frame[158:160, :] = 150
|
|
61
|
+
|
|
62
|
+
result = detect_dialogue_on_frame(frame_array=frame)
|
|
63
|
+
|
|
64
|
+
# Should detect based on contrast and structure
|
|
65
|
+
assert 'text contrast' in result['reason'].lower() or 'borders' in result['reason'].lower()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_dialogue_validation_in_state():
|
|
69
|
+
"""Test that dialogue validation works in state formatting."""
|
|
70
|
+
# State with dialogue text but no frame detection
|
|
71
|
+
state_no_detection = {
|
|
72
|
+
'player': {'name': 'Red', 'position': {'x': 10, 'y': 10}},
|
|
73
|
+
'game': {
|
|
74
|
+
'dialog_text': 'Hello trainer! Would you like to battle?',
|
|
75
|
+
'dialogue_detected': {'has_dialogue': True, 'confidence': 0.8}
|
|
76
|
+
},
|
|
77
|
+
'map': {}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
formatted = format_state_for_llm(state_no_detection)
|
|
81
|
+
assert 'DIALOGUE' in formatted
|
|
82
|
+
assert 'Hello trainer' in formatted
|
|
83
|
+
assert 'Detection confidence: 80.0%' in formatted
|
|
84
|
+
|
|
85
|
+
# State with dialogue text but frame says no dialogue visible
|
|
86
|
+
state_no_visible = {
|
|
87
|
+
'player': {'name': 'Red', 'position': {'x': 10, 'y': 10}},
|
|
88
|
+
'game': {
|
|
89
|
+
'dialog_text': 'Hello trainer! Would you like to battle?',
|
|
90
|
+
'dialogue_detected': {'has_dialogue': False, 'confidence': 0.1}
|
|
91
|
+
},
|
|
92
|
+
'map': {}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
formatted = format_state_for_llm(state_no_visible)
|
|
96
|
+
assert 'RESIDUAL TEXT' in formatted
|
|
97
|
+
assert 'not visible' in formatted
|
|
98
|
+
|
|
99
|
+
# State with no dialogue detection info (backwards compatibility)
|
|
100
|
+
state_legacy = {
|
|
101
|
+
'player': {'name': 'Red', 'position': {'x': 10, 'y': 10}},
|
|
102
|
+
'game': {
|
|
103
|
+
'dialog_text': 'Hello trainer! Would you like to battle?'
|
|
104
|
+
},
|
|
105
|
+
'map': {}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
formatted = format_state_for_llm(state_legacy)
|
|
109
|
+
assert 'Hello trainer' in formatted # Should still show dialogue
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_dialogue_detection_edge_cases():
|
|
113
|
+
"""Test edge cases for dialogue detection."""
|
|
114
|
+
# Test with None
|
|
115
|
+
result = detect_dialogue_on_frame(frame_array=None)
|
|
116
|
+
assert result['has_dialogue'] == False
|
|
117
|
+
assert 'No frame data' in result['reason']
|
|
118
|
+
|
|
119
|
+
# Test with wrong shape
|
|
120
|
+
small_frame = np.zeros((10, 10, 3), dtype=np.uint8)
|
|
121
|
+
result = detect_dialogue_on_frame(frame_array=small_frame)
|
|
122
|
+
# Should still work but likely no dialogue detected
|
|
123
|
+
assert 'has_dialogue' in result
|
|
124
|
+
assert 'confidence' in result
|
|
125
|
+
|
|
126
|
+
# Test with very small dialogue region
|
|
127
|
+
tiny_frame = np.zeros((50, 240, 3), dtype=np.uint8)
|
|
128
|
+
result = detect_dialogue_on_frame(frame_array=tiny_frame)
|
|
129
|
+
assert 'has_dialogue' in result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
if __name__ == "__main__":
|
|
133
|
+
pytest.main([__file__, "-v"])
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Comprehensive pytest for dialogue detection system across all states
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import io
|
|
10
|
+
import subprocess
|
|
11
|
+
import time
|
|
12
|
+
import requests
|
|
13
|
+
import json
|
|
14
|
+
import base64
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from PIL import Image
|
|
17
|
+
|
|
18
|
+
# Add parent directory to path for imports
|
|
19
|
+
sys.path.append(str(Path(__file__).parent.parent))
|
|
20
|
+
|
|
21
|
+
from utils.ocr_dialogue import create_ocr_detector
|
|
22
|
+
|
|
23
|
+
class TestDialogueDetection:
|
|
24
|
+
"""Test dialogue detection accuracy across all provided states"""
|
|
25
|
+
|
|
26
|
+
@pytest.fixture(autouse=True)
|
|
27
|
+
def setup(self):
|
|
28
|
+
"""Setup for each test"""
|
|
29
|
+
self.detector = create_ocr_detector()
|
|
30
|
+
self.agent_port = 8000
|
|
31
|
+
assert self.detector is not None, "Could not create OCR detector"
|
|
32
|
+
|
|
33
|
+
# Kill any existing agent_direct processes
|
|
34
|
+
subprocess.run(["pkill", "-f", "agent_direct.py"], capture_output=True)
|
|
35
|
+
time.sleep(1)
|
|
36
|
+
|
|
37
|
+
def teardown_method(self):
|
|
38
|
+
"""Cleanup after each test"""
|
|
39
|
+
subprocess.run(["pkill", "-f", "agent_direct.py"], capture_output=True)
|
|
40
|
+
time.sleep(0.5)
|
|
41
|
+
|
|
42
|
+
def _test_state_file(self, state_file, expected_dialogue, description=""):
|
|
43
|
+
"""Helper to test a single state file"""
|
|
44
|
+
print(f"\n🧪 Testing: {state_file}")
|
|
45
|
+
print(f" Expected dialogue: {expected_dialogue}")
|
|
46
|
+
print(f" Description: {description}")
|
|
47
|
+
|
|
48
|
+
# Start agent_direct with this state
|
|
49
|
+
cmd = [
|
|
50
|
+
"/home/milkkarten/anaconda3/envs/mgba/bin/python",
|
|
51
|
+
"agent_direct.py",
|
|
52
|
+
"--load-state", state_file,
|
|
53
|
+
"--backend", "gemini",
|
|
54
|
+
"--manual"
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
# Start agent_direct
|
|
58
|
+
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
# Wait for startup
|
|
62
|
+
time.sleep(3)
|
|
63
|
+
|
|
64
|
+
# Test server responsiveness
|
|
65
|
+
for attempt in range(5):
|
|
66
|
+
try:
|
|
67
|
+
response = requests.get(f"http://localhost:{self.agent_port}/status", timeout=2)
|
|
68
|
+
if response.status_code == 200:
|
|
69
|
+
break
|
|
70
|
+
time.sleep(1)
|
|
71
|
+
except:
|
|
72
|
+
time.sleep(1)
|
|
73
|
+
else:
|
|
74
|
+
pytest.fail(f"Agent_direct failed to start for {state_file}")
|
|
75
|
+
|
|
76
|
+
# Get screenshot
|
|
77
|
+
frame_response = requests.get(f"http://localhost:{self.agent_port}/api/frame", timeout=5)
|
|
78
|
+
assert frame_response.status_code == 200, f"Failed to get screenshot for {state_file}"
|
|
79
|
+
|
|
80
|
+
frame_data = frame_response.json()
|
|
81
|
+
assert frame_data.get('frame'), f"No frame data for {state_file}"
|
|
82
|
+
|
|
83
|
+
# Decode screenshot
|
|
84
|
+
image_data = base64.b64decode(frame_data['frame'])
|
|
85
|
+
screenshot = Image.open(io.BytesIO(image_data))
|
|
86
|
+
|
|
87
|
+
# Test dialogue detection
|
|
88
|
+
box_detected = self.detector.is_dialogue_box_visible(screenshot)
|
|
89
|
+
ocr_text = self.detector.detect_dialogue_from_screenshot(screenshot)
|
|
90
|
+
|
|
91
|
+
print(f" 📦 Box detected: {box_detected}")
|
|
92
|
+
print(f" 👁️ OCR text: '{ocr_text}'")
|
|
93
|
+
|
|
94
|
+
# Get memory reading for comparison
|
|
95
|
+
try:
|
|
96
|
+
state_response = requests.get(f"http://localhost:{self.agent_port}/state", timeout=3)
|
|
97
|
+
if state_response.status_code == 200:
|
|
98
|
+
state_data = state_response.json()
|
|
99
|
+
memory_text = state_data.get('game', {}).get('dialog_text', None)
|
|
100
|
+
print(f" 💾 Memory text: '{memory_text}'")
|
|
101
|
+
else:
|
|
102
|
+
memory_text = "N/A"
|
|
103
|
+
except:
|
|
104
|
+
memory_text = "N/A"
|
|
105
|
+
|
|
106
|
+
# Verify detection accuracy
|
|
107
|
+
assert box_detected == expected_dialogue, (
|
|
108
|
+
f"Detection mismatch for {state_file}: expected {expected_dialogue}, got {box_detected}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
'state_file': state_file,
|
|
113
|
+
'expected_dialogue': expected_dialogue,
|
|
114
|
+
'box_detected': box_detected,
|
|
115
|
+
'ocr_text': ocr_text,
|
|
116
|
+
'memory_text': memory_text,
|
|
117
|
+
'description': description
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
finally:
|
|
121
|
+
process.terminate()
|
|
122
|
+
time.sleep(1)
|
|
123
|
+
|
|
124
|
+
def test_coordinate_tightness(self):
|
|
125
|
+
"""Test that OCR coordinates are properly tight around text area"""
|
|
126
|
+
dialogue_coords = self.detector.DIALOGUE_BOX_COORDS
|
|
127
|
+
ocr_coords = self.detector.OCR_TEXT_COORDS
|
|
128
|
+
|
|
129
|
+
# Calculate margins
|
|
130
|
+
left_margin = ocr_coords['x'] - dialogue_coords['x']
|
|
131
|
+
top_margin = ocr_coords['y'] - dialogue_coords['y']
|
|
132
|
+
right_margin = (dialogue_coords['x'] + dialogue_coords['width']) - (ocr_coords['x'] + ocr_coords['width'])
|
|
133
|
+
bottom_margin = (dialogue_coords['y'] + dialogue_coords['height']) - (ocr_coords['y'] + ocr_coords['height'])
|
|
134
|
+
|
|
135
|
+
print(f"📏 Margins - Left: {left_margin}px, Top: {top_margin}px, Right: {right_margin}px, Bottom: {bottom_margin}px")
|
|
136
|
+
|
|
137
|
+
# Verify margins are reasonable (4-16 pixels to avoid borders but not cut text)
|
|
138
|
+
assert 4 <= left_margin <= 16, f"Left margin {left_margin}px outside acceptable range (4-16px)"
|
|
139
|
+
assert 4 <= top_margin <= 16, f"Top margin {top_margin}px outside acceptable range (4-16px)"
|
|
140
|
+
assert 4 <= right_margin <= 16, f"Right margin {right_margin}px outside acceptable range (4-16px)"
|
|
141
|
+
assert 4 <= bottom_margin <= 16, f"Bottom margin {bottom_margin}px outside acceptable range (4-16px)"
|
|
142
|
+
|
|
143
|
+
def test_no_dialog_states(self):
|
|
144
|
+
"""Test states that should NOT have dialogue"""
|
|
145
|
+
no_dialog_states = [
|
|
146
|
+
("tests/states/no_dialog1.state", "No dialogue state 1"),
|
|
147
|
+
("tests/states/no_dialog2.state", "No dialogue state 2"),
|
|
148
|
+
("tests/states/no_dialog3.state", "No dialogue state 3"),
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
for state_file, description in no_dialog_states:
|
|
152
|
+
if os.path.exists(state_file):
|
|
153
|
+
result = self._test_state_file(state_file, False, description)
|
|
154
|
+
assert result['box_detected'] == False, f"False positive detected in {state_file}"
|
|
155
|
+
else:
|
|
156
|
+
pytest.skip(f"State file not found: {state_file}")
|
|
157
|
+
|
|
158
|
+
def test_dialog_states(self):
|
|
159
|
+
"""Test states that SHOULD have dialogue"""
|
|
160
|
+
dialog_states = [
|
|
161
|
+
("tests/states/dialog.state", "Original dialogue state"),
|
|
162
|
+
("tests/states/dialog2.state", "Second dialogue state"),
|
|
163
|
+
("tests/states/dialog3.state", "New dialogue state 3"),
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
for state_file, description in dialog_states:
|
|
167
|
+
if os.path.exists(state_file):
|
|
168
|
+
result = self._test_state_file(state_file, True, description)
|
|
169
|
+
assert result['box_detected'] == True, f"Failed to detect dialogue in {state_file}"
|
|
170
|
+
else:
|
|
171
|
+
pytest.skip(f"State file not found: {state_file}")
|
|
172
|
+
|
|
173
|
+
def test_static_image_detection(self):
|
|
174
|
+
"""Test detection on static images"""
|
|
175
|
+
# Test known dialogue frame
|
|
176
|
+
if os.path.exists("dialog_frame.png"):
|
|
177
|
+
image = Image.open("dialog_frame.png")
|
|
178
|
+
box_detected = self.detector.is_dialogue_box_visible(image)
|
|
179
|
+
assert box_detected == True, "Failed to detect dialogue in known dialogue frame"
|
|
180
|
+
|
|
181
|
+
# Test emerald.png (should be no dialogue)
|
|
182
|
+
if os.path.exists("emerald.png"):
|
|
183
|
+
image = Image.open("emerald.png")
|
|
184
|
+
box_detected = self.detector.is_dialogue_box_visible(image)
|
|
185
|
+
assert box_detected == False, "False positive detected in emerald.png"
|
|
186
|
+
|
|
187
|
+
def test_ocr_preprocessing_quality(self):
|
|
188
|
+
"""Test that OCR preprocessing produces high-quality black/white output"""
|
|
189
|
+
if os.path.exists("dialog_frame.png"):
|
|
190
|
+
image = Image.open("dialog_frame.png")
|
|
191
|
+
image_np = np.array(image)
|
|
192
|
+
|
|
193
|
+
# Extract OCR region
|
|
194
|
+
ocr_coords = self.detector.OCR_TEXT_COORDS
|
|
195
|
+
ocr_region = image_np[
|
|
196
|
+
ocr_coords['y']:ocr_coords['y'] + ocr_coords['height'],
|
|
197
|
+
ocr_coords['x']:ocr_coords['x'] + ocr_coords['width']
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
# Test preprocessing
|
|
201
|
+
processed = self.detector._preprocess_for_ocr(ocr_region)
|
|
202
|
+
|
|
203
|
+
# Verify it's binary (only 0 and 255 values)
|
|
204
|
+
unique_values = np.unique(processed)
|
|
205
|
+
assert len(unique_values) <= 2, f"Processed image should be binary, found {len(unique_values)} unique values"
|
|
206
|
+
|
|
207
|
+
# Should have both black and white pixels (text and background)
|
|
208
|
+
if len(unique_values) == 2:
|
|
209
|
+
assert 0 in unique_values and 255 in unique_values, "Should have pure black (0) and white (255) pixels"
|
|
210
|
+
|
|
211
|
+
class TestDialogueIntegration:
|
|
212
|
+
"""Test integration with LLM agent comprehensive state"""
|
|
213
|
+
|
|
214
|
+
def test_comprehensive_state_includes_dialog(self):
|
|
215
|
+
"""Test that comprehensive state includes dialogue reading"""
|
|
216
|
+
# This test verifies the integration works but doesn't need to run agent_direct
|
|
217
|
+
# Just verify the OCR detector can be imported and works
|
|
218
|
+
detector = create_ocr_detector()
|
|
219
|
+
assert detector is not None, "OCR detector should be available for comprehensive state"
|
|
220
|
+
|
|
221
|
+
# Verify key methods exist
|
|
222
|
+
assert hasattr(detector, 'is_dialogue_box_visible'), "Detector should have dialogue box detection"
|
|
223
|
+
assert hasattr(detector, 'detect_dialogue_from_screenshot'), "Detector should have text detection"
|
|
224
|
+
assert hasattr(detector, 'read_dialog_with_ocr_fallback'), "Detector should have smart fallback logic"
|
|
225
|
+
|
|
226
|
+
if __name__ == "__main__":
|
|
227
|
+
# Allow running as script for debugging
|
|
228
|
+
import numpy as np
|
|
229
|
+
pytest.main([__file__, "-v", "-s"])
|