synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -1
- examples/swe/task_app/grpo_swe_mini.py +55 -26
- examples/swe/task_app/hosted/rollout.py +40 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/task_app/__init__.py +2 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +18 -13
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +25 -3
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +10 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
- examples/task_apps/pokemon_red/task_app.py +606 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +2 -2
- synth_ai/api/train/builders.py +25 -11
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +10 -10
- synth_ai/api/train/configs/rl.py +5 -4
- synth_ai/api/train/configs/sft.py +4 -3
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +7 -51
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +14 -7
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/rl_demo.py +8 -7
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/task_apps.py +1707 -186
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +30 -4
- synth_ai/evals/client.py +58 -61
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +16 -16
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +14 -5
- synth_ai/task/contracts.py +124 -38
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +53 -0
- synth_ai/task/rubrics/loaders.py +133 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +113 -0
- synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
- synth_ai/task/server.py +8 -7
- synth_ai/task/validators.py +269 -6
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +5 -5
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/native_manager.py +3 -3
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +214 -101
- examples/agora_ex/README_MoE.md +0 -224
- examples/agora_ex/__init__.py +0 -7
- examples/agora_ex/agora_ex.py +0 -65
- examples/agora_ex/agora_ex_task_app.py +0 -590
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
- examples/agora_ex/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/system_prompt_CURRENT.md +0 -63
- examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
- examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
- synth_ai/rubrics/__init__.py +0 -22
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Direct pytest for house to outside transition using emulator directly
|
|
4
|
+
This bypasses server issues and tests the core map reading functionality
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from pokemon_env.emulator import EmeraldEmulator
|
|
11
|
+
from tests.test_memory_map import format_map_data
|
|
12
|
+
|
|
13
|
+
class TestHouseToOutsideDirectTransition:
|
|
14
|
+
|
|
15
|
+
@pytest.fixture
|
|
16
|
+
def emulator(self):
|
|
17
|
+
"""Create and initialize emulator"""
|
|
18
|
+
project_root = Path.cwd()
|
|
19
|
+
rom_path = str(project_root / "Emerald-GBAdvance" / "rom.gba")
|
|
20
|
+
|
|
21
|
+
emu = EmeraldEmulator(rom_path, headless=True, sound=False)
|
|
22
|
+
emu.initialize()
|
|
23
|
+
|
|
24
|
+
yield emu
|
|
25
|
+
|
|
26
|
+
emu.stop()
|
|
27
|
+
|
|
28
|
+
def test_house_map_baseline(self, emulator):
|
|
29
|
+
"""Test that house map reads correctly as baseline"""
|
|
30
|
+
print("\n📍 Testing house map baseline...")
|
|
31
|
+
|
|
32
|
+
# Load house state
|
|
33
|
+
emulator.load_state("tests/states/house.state")
|
|
34
|
+
|
|
35
|
+
# Read initial map
|
|
36
|
+
map_data = emulator.memory_reader.read_map_around_player(radius=7)
|
|
37
|
+
assert map_data, "House map data should not be empty"
|
|
38
|
+
|
|
39
|
+
location = emulator.memory_reader.read_location()
|
|
40
|
+
position = emulator.memory_reader.read_coordinates()
|
|
41
|
+
|
|
42
|
+
print(f" Location: {location}")
|
|
43
|
+
print(f" Position: {position}")
|
|
44
|
+
print(f" Map size: {len(map_data)}x{len(map_data[0])}")
|
|
45
|
+
|
|
46
|
+
# Validate house map
|
|
47
|
+
validation = self._validate_map_structure(map_data, location, "house")
|
|
48
|
+
assert validation['is_valid'], f"House map validation failed: {validation['message']}"
|
|
49
|
+
|
|
50
|
+
# Show house map
|
|
51
|
+
formatted_map = format_map_data(map_data, f"House Baseline - {location}")
|
|
52
|
+
print(f" House map:\n{formatted_map}")
|
|
53
|
+
|
|
54
|
+
def test_walk_and_map_transition(self, emulator):
|
|
55
|
+
"""Test walking outside and check if map transitions work"""
|
|
56
|
+
print("\n🚶 Testing walk outside and map transition...")
|
|
57
|
+
|
|
58
|
+
# Load house state
|
|
59
|
+
emulator.load_state("tests/states/house.state")
|
|
60
|
+
|
|
61
|
+
# Get initial state
|
|
62
|
+
initial_location = emulator.memory_reader.read_location()
|
|
63
|
+
initial_position = emulator.memory_reader.read_coordinates()
|
|
64
|
+
|
|
65
|
+
print(f" Initial: {initial_location} at {initial_position}")
|
|
66
|
+
|
|
67
|
+
# First, look at the house map to find the door
|
|
68
|
+
house_map = emulator.memory_reader.read_map_around_player(radius=7)
|
|
69
|
+
self._analyze_map_for_exits(house_map, initial_position)
|
|
70
|
+
|
|
71
|
+
# Try different movement patterns to find the exit
|
|
72
|
+
movements = [
|
|
73
|
+
("DOWN", [('down', 10)]),
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
for movement_name, button_sequence in movements:
|
|
77
|
+
print(f"\n Trying movement pattern: {movement_name}")
|
|
78
|
+
|
|
79
|
+
# Reload state for fresh attempt
|
|
80
|
+
emulator.load_state("tests/states/house.state")
|
|
81
|
+
|
|
82
|
+
# Execute button sequence
|
|
83
|
+
for button, count in button_sequence:
|
|
84
|
+
for i in range(count):
|
|
85
|
+
emulator.press_buttons([button], hold_frames=15, release_frames=15)
|
|
86
|
+
time.sleep(0.1)
|
|
87
|
+
|
|
88
|
+
# Check result
|
|
89
|
+
new_location = emulator.memory_reader.read_location()
|
|
90
|
+
new_position = emulator.memory_reader.read_coordinates()
|
|
91
|
+
|
|
92
|
+
print(f" Result: {new_location} at {new_position}")
|
|
93
|
+
|
|
94
|
+
# If we successfully exited the house, test the map
|
|
95
|
+
if 'HOUSE' not in new_location.upper():
|
|
96
|
+
print(f" ✅ Successfully exited house with pattern: {movement_name}")
|
|
97
|
+
return self._test_outside_map(emulator, new_location, new_position)
|
|
98
|
+
|
|
99
|
+
# If no pattern worked, show debugging info
|
|
100
|
+
print(f" ❌ Could not exit house with any movement pattern")
|
|
101
|
+
final_map = emulator.memory_reader.read_map_around_player(radius=7)
|
|
102
|
+
formatted_map = format_map_data(final_map, "Final House Map")
|
|
103
|
+
print(f" Final map:\n{formatted_map}")
|
|
104
|
+
|
|
105
|
+
pytest.fail("Could not exit house to test outside map transition")
|
|
106
|
+
|
|
107
|
+
def _test_outside_map(self, emulator, location, position):
|
|
108
|
+
"""Test the outside map after successful transition"""
|
|
109
|
+
print(f"\n🗺️ Testing outside map: {location} at {position}")
|
|
110
|
+
|
|
111
|
+
# Read outside map
|
|
112
|
+
outside_map = emulator.memory_reader.read_map_around_player(radius=7)
|
|
113
|
+
|
|
114
|
+
if not outside_map:
|
|
115
|
+
print(" ❌ Outside map is empty - this is the bug!")
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
# Validate outside map
|
|
119
|
+
validation = self._validate_map_structure(outside_map, location, "outside")
|
|
120
|
+
|
|
121
|
+
# Show outside map regardless of validation
|
|
122
|
+
formatted_map = format_map_data(outside_map, f"Outside Map - {location}")
|
|
123
|
+
print(f" Outside map:\n{formatted_map}")
|
|
124
|
+
|
|
125
|
+
if validation['is_valid']:
|
|
126
|
+
print(f" ✅ Outside map validation passed: {validation['message']}")
|
|
127
|
+
return True
|
|
128
|
+
else:
|
|
129
|
+
print(f" ❌ Outside map validation failed: {validation['message']}")
|
|
130
|
+
print(" This confirms the transition bug!")
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
def _analyze_map_for_exits(self, map_data, player_pos):
|
|
134
|
+
"""Analyze house map to find potential exits"""
|
|
135
|
+
print(f" Analyzing house map for exits around player at {player_pos}...")
|
|
136
|
+
|
|
137
|
+
center_y = len(map_data) // 2
|
|
138
|
+
center_x = len(map_data[0]) // 2
|
|
139
|
+
|
|
140
|
+
# Check tiles around player for doors or exits
|
|
141
|
+
for dy in range(-1, 2):
|
|
142
|
+
for dx in range(-1, 2):
|
|
143
|
+
y = center_y + dy
|
|
144
|
+
x = center_x + dx
|
|
145
|
+
|
|
146
|
+
if 0 <= y < len(map_data) and 0 <= x < len(map_data[0]):
|
|
147
|
+
tile = map_data[y][x]
|
|
148
|
+
if len(tile) >= 4:
|
|
149
|
+
tile_id, behavior, collision, elevation = tile
|
|
150
|
+
behavior_name = behavior.name if hasattr(behavior, 'name') else f"Raw({behavior})"
|
|
151
|
+
|
|
152
|
+
if dy == 0 and dx == 0:
|
|
153
|
+
print(f" Player: {behavior_name} (collision={collision})")
|
|
154
|
+
elif "DOOR" in behavior_name:
|
|
155
|
+
print(f" Door found at ({dx:+2d},{dy:+2d}): {behavior_name}")
|
|
156
|
+
elif collision == 0:
|
|
157
|
+
print(f" Walkable at ({dx:+2d},{dy:+2d}): {behavior_name}")
|
|
158
|
+
|
|
159
|
+
def _validate_map_structure(self, map_data, location_name, area_type):
|
|
160
|
+
"""Validate map structure"""
|
|
161
|
+
if not map_data or len(map_data) == 0:
|
|
162
|
+
return {"is_valid": False, "message": "Empty map data"}
|
|
163
|
+
|
|
164
|
+
total_tiles = sum(len(row) for row in map_data)
|
|
165
|
+
unknown_tiles = 0
|
|
166
|
+
valid_tiles = 0
|
|
167
|
+
|
|
168
|
+
for row in map_data:
|
|
169
|
+
for tile in row:
|
|
170
|
+
if len(tile) >= 2:
|
|
171
|
+
behavior = tile[1]
|
|
172
|
+
if hasattr(behavior, 'name'):
|
|
173
|
+
behavior_name = behavior.name
|
|
174
|
+
elif isinstance(behavior, int):
|
|
175
|
+
try:
|
|
176
|
+
from pokemon_env.enums import MetatileBehavior
|
|
177
|
+
behavior_enum = MetatileBehavior(behavior)
|
|
178
|
+
behavior_name = behavior_enum.name
|
|
179
|
+
except ValueError:
|
|
180
|
+
behavior_name = "UNKNOWN"
|
|
181
|
+
else:
|
|
182
|
+
behavior_name = "UNKNOWN"
|
|
183
|
+
|
|
184
|
+
if behavior_name == "UNKNOWN":
|
|
185
|
+
unknown_tiles += 1
|
|
186
|
+
else:
|
|
187
|
+
valid_tiles += 1
|
|
188
|
+
|
|
189
|
+
unknown_ratio = unknown_tiles / total_tiles if total_tiles > 0 else 0
|
|
190
|
+
|
|
191
|
+
if unknown_ratio > 0.5:
|
|
192
|
+
return {"is_valid": False, "message": f"Too many unknown tiles: {unknown_ratio:.1%}"}
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
"is_valid": True,
|
|
196
|
+
"message": f"Structure valid: {valid_tiles}/{total_tiles} valid tiles ({unknown_ratio:.1%} unknown)"
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if __name__ == "__main__":
|
|
200
|
+
pytest.main([__file__, "-v", "-s"])
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Pytest for the house to outside transition bug
|
|
4
|
+
This test reproduces the specific issue where transitioning from house.state
|
|
5
|
+
to outside results in incorrect map data
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
import requests
|
|
10
|
+
import time
|
|
11
|
+
import threading
|
|
12
|
+
import subprocess
|
|
13
|
+
import os
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from tests.test_memory_map import format_map_data, MetatileBehavior
|
|
17
|
+
|
|
18
|
+
# Test configuration
|
|
19
|
+
SERVER_PORT = 8002 # Use different port to avoid conflicts
|
|
20
|
+
SERVER_URL = f"http://127.0.0.1:{SERVER_PORT}"
|
|
21
|
+
|
|
22
|
+
class TestHouseToOutsideTransition:
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def setup_class(cls):
|
|
26
|
+
"""Start server with house.state before running tests"""
|
|
27
|
+
print(f"\n🚀 Starting server on port {SERVER_PORT} with house.state...")
|
|
28
|
+
|
|
29
|
+
# Start server in background
|
|
30
|
+
project_root = Path.cwd()
|
|
31
|
+
server_cmd = [
|
|
32
|
+
"python", "-m", "server.app",
|
|
33
|
+
"--load-state", "tests/states/house.state",
|
|
34
|
+
"--port", str(SERVER_PORT),
|
|
35
|
+
"--manual"
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
cls.server_process = subprocess.Popen(
|
|
39
|
+
server_cmd,
|
|
40
|
+
stdout=subprocess.PIPE,
|
|
41
|
+
stderr=subprocess.PIPE,
|
|
42
|
+
text=True,
|
|
43
|
+
env={**os.environ, "CONDA_DEFAULT_ENV": "mgba"}
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Wait for server to start
|
|
47
|
+
max_wait = 30
|
|
48
|
+
for i in range(max_wait):
|
|
49
|
+
try:
|
|
50
|
+
response = requests.get(f"{SERVER_URL}/status", timeout=1)
|
|
51
|
+
if response.status_code == 200:
|
|
52
|
+
print(f"✅ Server started successfully after {i+1} seconds")
|
|
53
|
+
break
|
|
54
|
+
except requests.exceptions.RequestException:
|
|
55
|
+
if i < max_wait - 1:
|
|
56
|
+
time.sleep(1)
|
|
57
|
+
continue
|
|
58
|
+
else:
|
|
59
|
+
# Kill process if it started but isn't responding
|
|
60
|
+
cls.server_process.terminate()
|
|
61
|
+
cls.server_process.wait()
|
|
62
|
+
raise Exception(f"Server failed to start within {max_wait} seconds")
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def teardown_class(cls):
|
|
66
|
+
"""Stop server after all tests"""
|
|
67
|
+
print("\n🛑 Stopping server...")
|
|
68
|
+
if hasattr(cls, 'server_process'):
|
|
69
|
+
cls.server_process.terminate()
|
|
70
|
+
cls.server_process.wait()
|
|
71
|
+
print("✅ Server stopped")
|
|
72
|
+
|
|
73
|
+
def test_initial_house_map(self):
|
|
74
|
+
"""Test that the initial house map matches the expected ground truth"""
|
|
75
|
+
print("\n📍 Testing initial house map...")
|
|
76
|
+
|
|
77
|
+
# Get state from server
|
|
78
|
+
response = requests.get(f"{SERVER_URL}/state", timeout=5)
|
|
79
|
+
assert response.status_code == 200, "Failed to get server state"
|
|
80
|
+
|
|
81
|
+
state_data = response.json()
|
|
82
|
+
|
|
83
|
+
# Verify location
|
|
84
|
+
location = state_data.get('player', {}).get('location', '')
|
|
85
|
+
assert 'BRENDANS HOUSE 1F' in location.upper(), f"Expected house location, got: {location}"
|
|
86
|
+
|
|
87
|
+
# Get map tiles
|
|
88
|
+
assert 'map' in state_data, "No map data in state"
|
|
89
|
+
assert 'tiles' in state_data['map'], "No tiles in map data"
|
|
90
|
+
|
|
91
|
+
map_tiles = state_data['map']['tiles']
|
|
92
|
+
assert len(map_tiles) > 0, "Map tiles are empty"
|
|
93
|
+
|
|
94
|
+
# Format map data (convert from server format to test format)
|
|
95
|
+
formatted_map = self._format_server_map_data(map_tiles, f"House Map - {location}")
|
|
96
|
+
|
|
97
|
+
# Load expected ground truth
|
|
98
|
+
truth_path = Path("tests/states/house_map_truth.txt")
|
|
99
|
+
if truth_path.exists():
|
|
100
|
+
with open(truth_path, 'r') as f:
|
|
101
|
+
expected_map = f.read().strip()
|
|
102
|
+
|
|
103
|
+
# Compare maps (allowing for some flexibility in coordinates/format)
|
|
104
|
+
assert self._maps_are_similar(formatted_map, expected_map), \
|
|
105
|
+
f"House map doesn't match expected format:\n\nActual:\n{formatted_map}\n\nExpected:\n{expected_map}"
|
|
106
|
+
else:
|
|
107
|
+
print(f"⚠️ Ground truth file not found at {truth_path}")
|
|
108
|
+
print(f"House map format:\n{formatted_map}")
|
|
109
|
+
# Don't fail if ground truth doesn't exist, just verify basic structure
|
|
110
|
+
assert "HOUSE" in formatted_map or "BRENDAN" in formatted_map, "Map should contain house-related content"
|
|
111
|
+
|
|
112
|
+
def test_walk_outside_transition(self):
|
|
113
|
+
"""Test walking outside from house and verify map is correct"""
|
|
114
|
+
print("\n🚶 Testing transition from house to outside...")
|
|
115
|
+
|
|
116
|
+
# First, check initial position
|
|
117
|
+
response = requests.get(f"{SERVER_URL}/state", timeout=5)
|
|
118
|
+
initial_state = response.json()
|
|
119
|
+
initial_pos = initial_state.get('player', {}).get('position', {})
|
|
120
|
+
print(f" Initial position: ({initial_pos.get('x', '?')}, {initial_pos.get('y', '?')})")
|
|
121
|
+
|
|
122
|
+
# Walk down until we exit the house (up to 10 steps)
|
|
123
|
+
steps_taken = 0
|
|
124
|
+
max_steps = 10
|
|
125
|
+
|
|
126
|
+
for i in range(max_steps):
|
|
127
|
+
print(f" Step {i+1}: Walking DOWN...")
|
|
128
|
+
response = requests.post(f"{SERVER_URL}/action",
|
|
129
|
+
json={"type": "button", "button": "down"},
|
|
130
|
+
timeout=5)
|
|
131
|
+
assert response.status_code == 200, f"Failed to send DOWN action on step {i+1}"
|
|
132
|
+
time.sleep(0.5) # Longer delay to ensure movement completes
|
|
133
|
+
|
|
134
|
+
# Check current location after this step
|
|
135
|
+
response = requests.get(f"{SERVER_URL}/state", timeout=5)
|
|
136
|
+
state_data = response.json()
|
|
137
|
+
location = state_data.get('player', {}).get('location', '')
|
|
138
|
+
position = state_data.get('player', {}).get('position', {})
|
|
139
|
+
|
|
140
|
+
print(f" After step {i+1}: {location} at ({position.get('x', '?')}, {position.get('y', '?')})")
|
|
141
|
+
|
|
142
|
+
# Check if we've exited the house
|
|
143
|
+
if 'HOUSE' not in location.upper():
|
|
144
|
+
print(f" ✅ Exited house after {i+1} steps!")
|
|
145
|
+
steps_taken = i + 1
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
if steps_taken == 0:
|
|
149
|
+
# If we never exited, show current state for debugging
|
|
150
|
+
print(f" ❌ Never exited house after {max_steps} steps. Current location: {location}")
|
|
151
|
+
assert False, f"Failed to exit house after {max_steps} DOWN movements"
|
|
152
|
+
|
|
153
|
+
# Get state after transition
|
|
154
|
+
response = requests.get(f"{SERVER_URL}/state", timeout=5)
|
|
155
|
+
assert response.status_code == 200, "Failed to get server state after transition"
|
|
156
|
+
|
|
157
|
+
state_data = response.json()
|
|
158
|
+
|
|
159
|
+
# Verify we're now outside
|
|
160
|
+
location = state_data.get('player', {}).get('location', '')
|
|
161
|
+
assert 'LITTLEROOT TOWN' in location.upper(), f"Expected to be in Littleroot Town, got: {location}"
|
|
162
|
+
assert 'HOUSE' not in location.upper(), f"Should be outside house, but got: {location}"
|
|
163
|
+
|
|
164
|
+
# Get map tiles
|
|
165
|
+
assert 'map' in state_data, "No map data in state after transition"
|
|
166
|
+
assert 'tiles' in state_data['map'], "No tiles in map data after transition"
|
|
167
|
+
|
|
168
|
+
map_tiles = state_data['map']['tiles']
|
|
169
|
+
assert len(map_tiles) > 0, "Map tiles are empty after transition"
|
|
170
|
+
|
|
171
|
+
# Validate map quality
|
|
172
|
+
validation_result = self._validate_outside_map(map_tiles, location)
|
|
173
|
+
assert validation_result['is_valid'], f"Outside map validation failed: {validation_result['message']}"
|
|
174
|
+
|
|
175
|
+
# Format and display map for debugging
|
|
176
|
+
formatted_map = self._format_server_map_data(map_tiles, f"Outside Map - {location}")
|
|
177
|
+
print(f"\n🗺️ Outside map:\n{formatted_map}")
|
|
178
|
+
|
|
179
|
+
print(f"✅ Map validation: {validation_result['message']}")
|
|
180
|
+
|
|
181
|
+
def _format_server_map_data(self, server_tiles, title="Map Data"):
|
|
182
|
+
"""Convert server tile format to the same format as test_memory_map.py"""
|
|
183
|
+
# Convert server format [tile_id, behavior_int, collision, elevation]
|
|
184
|
+
# to test format (tile_id, behavior_enum, collision, elevation)
|
|
185
|
+
formatted_tiles = []
|
|
186
|
+
|
|
187
|
+
for row in server_tiles:
|
|
188
|
+
formatted_row = []
|
|
189
|
+
for tile in row:
|
|
190
|
+
if len(tile) >= 4:
|
|
191
|
+
tile_id, behavior_int, collision, elevation = tile
|
|
192
|
+
|
|
193
|
+
# Convert behavior integer to enum for compatibility
|
|
194
|
+
try:
|
|
195
|
+
behavior_enum = MetatileBehavior(behavior_int)
|
|
196
|
+
except ValueError:
|
|
197
|
+
behavior_enum = None # Will be handled as "UNKNOWN" in format function
|
|
198
|
+
|
|
199
|
+
formatted_row.append((tile_id, behavior_enum, collision, elevation))
|
|
200
|
+
else:
|
|
201
|
+
# Fallback for incomplete tile data
|
|
202
|
+
formatted_row.append((0, None, 0, 0))
|
|
203
|
+
|
|
204
|
+
formatted_tiles.append(formatted_row)
|
|
205
|
+
|
|
206
|
+
return format_map_data(formatted_tiles, title)
|
|
207
|
+
|
|
208
|
+
def _maps_are_similar(self, actual, expected):
|
|
209
|
+
"""Check if two maps are similar (allowing for minor differences)"""
|
|
210
|
+
# For now, just check that both contain reasonable map structure
|
|
211
|
+
# Could be made more sophisticated later
|
|
212
|
+
|
|
213
|
+
# Both should have map dimensions
|
|
214
|
+
actual_has_dimensions = "Map dimensions:" in actual
|
|
215
|
+
expected_has_dimensions = "Map dimensions:" in expected
|
|
216
|
+
|
|
217
|
+
# Both should have traversability map
|
|
218
|
+
actual_has_traversability = "TRAVERSABILITY MAP" in actual
|
|
219
|
+
expected_has_traversability = "TRAVERSABILITY MAP" in expected
|
|
220
|
+
|
|
221
|
+
# Both should have player position
|
|
222
|
+
actual_has_player = " P " in actual
|
|
223
|
+
expected_has_player = " P " in expected
|
|
224
|
+
|
|
225
|
+
return (actual_has_dimensions and expected_has_dimensions and
|
|
226
|
+
actual_has_traversability and expected_has_traversability and
|
|
227
|
+
actual_has_player and expected_has_player)
|
|
228
|
+
|
|
229
|
+
def _validate_outside_map(self, map_tiles, location_name):
|
|
230
|
+
"""Validate that outside map looks reasonable"""
|
|
231
|
+
if not map_tiles or len(map_tiles) == 0:
|
|
232
|
+
return {"is_valid": False, "message": "Empty map data"}
|
|
233
|
+
|
|
234
|
+
total_tiles = sum(len(row) for row in map_tiles)
|
|
235
|
+
unknown_tiles = 0
|
|
236
|
+
walkable_tiles = 0
|
|
237
|
+
wall_tiles = 0
|
|
238
|
+
special_tiles = 0
|
|
239
|
+
|
|
240
|
+
for row in map_tiles:
|
|
241
|
+
for tile in row:
|
|
242
|
+
if len(tile) >= 4:
|
|
243
|
+
tile_id, behavior_int, collision, elevation = tile
|
|
244
|
+
|
|
245
|
+
# Convert behavior
|
|
246
|
+
try:
|
|
247
|
+
behavior_enum = MetatileBehavior(behavior_int)
|
|
248
|
+
behavior_name = behavior_enum.name
|
|
249
|
+
except ValueError:
|
|
250
|
+
behavior_name = "UNKNOWN"
|
|
251
|
+
|
|
252
|
+
if behavior_name == "UNKNOWN":
|
|
253
|
+
unknown_tiles += 1
|
|
254
|
+
elif behavior_name == "NORMAL":
|
|
255
|
+
if collision == 0:
|
|
256
|
+
walkable_tiles += 1
|
|
257
|
+
else:
|
|
258
|
+
wall_tiles += 1
|
|
259
|
+
else:
|
|
260
|
+
special_tiles += 1
|
|
261
|
+
|
|
262
|
+
unknown_ratio = unknown_tiles / total_tiles if total_tiles > 0 else 0
|
|
263
|
+
walkable_ratio = walkable_tiles / total_tiles if total_tiles > 0 else 0
|
|
264
|
+
wall_ratio = wall_tiles / total_tiles if total_tiles > 0 else 0
|
|
265
|
+
|
|
266
|
+
# Validation rules for outside area
|
|
267
|
+
if unknown_ratio > 0.2:
|
|
268
|
+
return {"is_valid": False, "message": f"Too many unknown tiles: {unknown_ratio:.1%}"}
|
|
269
|
+
|
|
270
|
+
if walkable_ratio < 0.15:
|
|
271
|
+
return {"is_valid": False, "message": f"Too few walkable tiles: {walkable_ratio:.1%}"}
|
|
272
|
+
|
|
273
|
+
if wall_ratio > 0.95:
|
|
274
|
+
return {"is_valid": False, "message": f"Too many walls: {wall_ratio:.1%}"}
|
|
275
|
+
|
|
276
|
+
return {
|
|
277
|
+
"is_valid": True,
|
|
278
|
+
"message": f"Map valid: {walkable_ratio:.1%} walkable, {wall_ratio:.1%} walls, {unknown_ratio:.1%} unknown"
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
if __name__ == "__main__":
|
|
282
|
+
# Run the test directly
|
|
283
|
+
import sys
|
|
284
|
+
pytest.main([__file__, "-v", "-s"] + sys.argv[1:])
|