synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (229) hide show
  1. examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
  2. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
  4. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
  5. examples/multi_step/crafter_rl_lora.md +51 -10
  6. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  7. examples/multi_step/task_app_config_notes.md +7 -1
  8. examples/swe/task_app/grpo_swe_mini.py +55 -26
  9. examples/swe/task_app/hosted/rollout.py +40 -0
  10. examples/swe/task_app/hosted/test_service.py +5 -6
  11. examples/task_apps/TESTING.md +275 -0
  12. examples/task_apps/__init__.py +0 -0
  13. examples/task_apps/crafter/__init__.py +0 -0
  14. examples/task_apps/crafter/task_app/__init__.py +2 -0
  15. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
  16. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  17. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
  18. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
  19. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
  20. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
  21. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  22. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  23. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  24. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  25. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  26. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  27. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  28. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  29. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  30. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  31. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  32. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  33. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  34. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  35. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  36. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  37. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  38. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  39. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  40. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  41. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  42. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  43. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  44. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  45. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  78. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  79. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  80. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  81. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  82. examples/task_apps/enron/__init__.py +1 -0
  83. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  84. examples/task_apps/enron/task_app/README.md +14 -0
  85. examples/task_apps/enron/task_app/__init__.py +1 -0
  86. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  87. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  88. examples/task_apps/enron/tests/__init__.py +2 -0
  89. examples/task_apps/enron/tests/conftest.py +115 -0
  90. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  91. examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
  92. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  93. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  94. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  95. examples/task_apps/math/__init__.py +0 -0
  96. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  97. examples/task_apps/pokemon_battle/__init__.py +2 -0
  98. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  99. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  100. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  101. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  102. examples/task_apps/pokemon_red/README.md +357 -0
  103. examples/task_apps/pokemon_red/__init__.py +3 -0
  104. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  105. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
  106. examples/task_apps/pokemon_red/task_app.py +606 -0
  107. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
  108. examples/task_apps/sokoban/README.md +307 -0
  109. examples/task_apps/sokoban/__init__.py +3 -0
  110. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  111. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  112. examples/task_apps/sokoban/task_app.py +1058 -0
  113. examples/task_apps/sokoban/tests/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/conftest.py +113 -0
  115. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  116. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  117. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  118. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  119. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  120. examples/task_apps/verilog/__init__.py +1 -0
  121. examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
  122. examples/task_apps/verilog/task_app/README.md +12 -0
  123. examples/task_apps/verilog/task_app/__init__.py +1 -0
  124. examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
  125. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  126. examples/task_apps/verilog/tests/__init__.py +2 -0
  127. examples/task_apps/verilog/tests/conftest.py +115 -0
  128. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  129. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
  130. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  131. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  132. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  133. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  134. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  135. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
  136. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
  137. examples/warming_up_to_rl/run_eval.py +127 -18
  138. examples/workflows/__init__.py +0 -0
  139. examples/workflows/math_rl/__init__.py +0 -0
  140. examples/workflows/math_rl/download_dataset.py +80 -0
  141. synth_ai/__init__.py +41 -1
  142. synth_ai/api/train/builders.py +73 -29
  143. synth_ai/api/train/cli.py +12 -6
  144. synth_ai/api/train/configs/__init__.py +44 -0
  145. synth_ai/api/train/configs/rl.py +134 -0
  146. synth_ai/api/train/configs/sft.py +95 -0
  147. synth_ai/api/train/configs/shared.py +24 -0
  148. synth_ai/api/train/env_resolver.py +5 -2
  149. synth_ai/api/train/supported_algos.py +10 -5
  150. synth_ai/api/train/utils.py +7 -4
  151. synth_ai/cli/__init__.py +7 -51
  152. synth_ai/cli/_storage.py +4 -3
  153. synth_ai/cli/_validate_task_app.py +11 -0
  154. synth_ai/cli/balance.py +4 -3
  155. synth_ai/cli/calc.py +2 -2
  156. synth_ai/cli/demo.py +49 -43
  157. synth_ai/cli/legacy_root_backup.py +1 -1
  158. synth_ai/cli/rl_demo.py +86 -106
  159. synth_ai/cli/root.py +0 -97
  160. synth_ai/cli/task_apps.py +1710 -186
  161. synth_ai/demos/core/cli.py +121 -159
  162. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
  163. synth_ai/environments/examples/crafter_classic/environment.py +16 -0
  164. synth_ai/environments/examples/enron/engine.py +7 -2
  165. synth_ai/environments/examples/enron/environment.py +68 -0
  166. synth_ai/environments/examples/red/engine.py +27 -0
  167. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  168. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  169. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  170. synth_ai/environments/examples/red/environment.py +60 -0
  171. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  172. synth_ai/environments/examples/verilog/engine.py +30 -4
  173. synth_ai/evals/__init__.py +15 -0
  174. synth_ai/evals/client.py +82 -0
  175. synth_ai/evals/types.py +42 -0
  176. synth_ai/jobs/client.py +16 -4
  177. synth_ai/judge_schemas.py +127 -0
  178. synth_ai/py.typed +0 -0
  179. synth_ai/task/__init__.py +14 -5
  180. synth_ai/task/contracts.py +124 -38
  181. synth_ai/task/proxy.py +48 -56
  182. synth_ai/task/rubrics/__init__.py +53 -0
  183. synth_ai/task/rubrics/loaders.py +133 -0
  184. synth_ai/task/rubrics/models.py +57 -0
  185. synth_ai/task/rubrics/scoring.py +113 -0
  186. synth_ai/task/rubrics/strict.py +149 -0
  187. synth_ai/task/server.py +8 -7
  188. synth_ai/task/validators.py +269 -6
  189. synth_ai/tracing_v3/decorators.py +7 -3
  190. synth_ai/tracing_v3/replica_sync.py +4 -4
  191. synth_ai/tracing_v3/serialization.py +130 -0
  192. synth_ai/tracing_v3/trace_utils.py +317 -0
  193. synth_ai/tracing_v3/turso/native_manager.py +3 -3
  194. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
  195. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
  196. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
  197. synth_ai/task/rubrics.py +0 -219
  198. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  199. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  200. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  201. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  202. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  203. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  204. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  205. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  206. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
  207. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
  208. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  209. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  210. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  211. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  212. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  213. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  214. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  215. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  216. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  217. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
  218. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  219. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  220. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  221. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  222. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  223. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  224. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  225. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  226. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  227. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
  228. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
  229. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,200 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Direct pytest for house to outside transition using emulator directly
4
+ This bypasses server issues and tests the core map reading functionality
5
+ """
6
+
7
+ import pytest
8
+ import time
9
+ from pathlib import Path
10
+ from pokemon_env.emulator import EmeraldEmulator
11
+ from tests.test_memory_map import format_map_data
12
+
13
+ class TestHouseToOutsideDirectTransition:
14
+
15
+ @pytest.fixture
16
+ def emulator(self):
17
+ """Create and initialize emulator"""
18
+ project_root = Path.cwd()
19
+ rom_path = str(project_root / "Emerald-GBAdvance" / "rom.gba")
20
+
21
+ emu = EmeraldEmulator(rom_path, headless=True, sound=False)
22
+ emu.initialize()
23
+
24
+ yield emu
25
+
26
+ emu.stop()
27
+
28
+ def test_house_map_baseline(self, emulator):
29
+ """Test that house map reads correctly as baseline"""
30
+ print("\n📍 Testing house map baseline...")
31
+
32
+ # Load house state
33
+ emulator.load_state("tests/states/house.state")
34
+
35
+ # Read initial map
36
+ map_data = emulator.memory_reader.read_map_around_player(radius=7)
37
+ assert map_data, "House map data should not be empty"
38
+
39
+ location = emulator.memory_reader.read_location()
40
+ position = emulator.memory_reader.read_coordinates()
41
+
42
+ print(f" Location: {location}")
43
+ print(f" Position: {position}")
44
+ print(f" Map size: {len(map_data)}x{len(map_data[0])}")
45
+
46
+ # Validate house map
47
+ validation = self._validate_map_structure(map_data, location, "house")
48
+ assert validation['is_valid'], f"House map validation failed: {validation['message']}"
49
+
50
+ # Show house map
51
+ formatted_map = format_map_data(map_data, f"House Baseline - {location}")
52
+ print(f" House map:\n{formatted_map}")
53
+
54
+ def test_walk_and_map_transition(self, emulator):
55
+ """Test walking outside and check if map transitions work"""
56
+ print("\n🚶 Testing walk outside and map transition...")
57
+
58
+ # Load house state
59
+ emulator.load_state("tests/states/house.state")
60
+
61
+ # Get initial state
62
+ initial_location = emulator.memory_reader.read_location()
63
+ initial_position = emulator.memory_reader.read_coordinates()
64
+
65
+ print(f" Initial: {initial_location} at {initial_position}")
66
+
67
+ # First, look at the house map to find the door
68
+ house_map = emulator.memory_reader.read_map_around_player(radius=7)
69
+ self._analyze_map_for_exits(house_map, initial_position)
70
+
71
+ # Try different movement patterns to find the exit
72
+ movements = [
73
+ ("DOWN", [('down', 10)]),
74
+ ]
75
+
76
+ for movement_name, button_sequence in movements:
77
+ print(f"\n Trying movement pattern: {movement_name}")
78
+
79
+ # Reload state for fresh attempt
80
+ emulator.load_state("tests/states/house.state")
81
+
82
+ # Execute button sequence
83
+ for button, count in button_sequence:
84
+ for i in range(count):
85
+ emulator.press_buttons([button], hold_frames=15, release_frames=15)
86
+ time.sleep(0.1)
87
+
88
+ # Check result
89
+ new_location = emulator.memory_reader.read_location()
90
+ new_position = emulator.memory_reader.read_coordinates()
91
+
92
+ print(f" Result: {new_location} at {new_position}")
93
+
94
+ # If we successfully exited the house, test the map
95
+ if 'HOUSE' not in new_location.upper():
96
+ print(f" ✅ Successfully exited house with pattern: {movement_name}")
97
+ return self._test_outside_map(emulator, new_location, new_position)
98
+
99
+ # If no pattern worked, show debugging info
100
+ print(f" ❌ Could not exit house with any movement pattern")
101
+ final_map = emulator.memory_reader.read_map_around_player(radius=7)
102
+ formatted_map = format_map_data(final_map, "Final House Map")
103
+ print(f" Final map:\n{formatted_map}")
104
+
105
+ pytest.fail("Could not exit house to test outside map transition")
106
+
107
+ def _test_outside_map(self, emulator, location, position):
108
+ """Test the outside map after successful transition"""
109
+ print(f"\n🗺️ Testing outside map: {location} at {position}")
110
+
111
+ # Read outside map
112
+ outside_map = emulator.memory_reader.read_map_around_player(radius=7)
113
+
114
+ if not outside_map:
115
+ print(" ❌ Outside map is empty - this is the bug!")
116
+ return False
117
+
118
+ # Validate outside map
119
+ validation = self._validate_map_structure(outside_map, location, "outside")
120
+
121
+ # Show outside map regardless of validation
122
+ formatted_map = format_map_data(outside_map, f"Outside Map - {location}")
123
+ print(f" Outside map:\n{formatted_map}")
124
+
125
+ if validation['is_valid']:
126
+ print(f" ✅ Outside map validation passed: {validation['message']}")
127
+ return True
128
+ else:
129
+ print(f" ❌ Outside map validation failed: {validation['message']}")
130
+ print(" This confirms the transition bug!")
131
+ return False
132
+
133
+ def _analyze_map_for_exits(self, map_data, player_pos):
134
+ """Analyze house map to find potential exits"""
135
+ print(f" Analyzing house map for exits around player at {player_pos}...")
136
+
137
+ center_y = len(map_data) // 2
138
+ center_x = len(map_data[0]) // 2
139
+
140
+ # Check tiles around player for doors or exits
141
+ for dy in range(-1, 2):
142
+ for dx in range(-1, 2):
143
+ y = center_y + dy
144
+ x = center_x + dx
145
+
146
+ if 0 <= y < len(map_data) and 0 <= x < len(map_data[0]):
147
+ tile = map_data[y][x]
148
+ if len(tile) >= 4:
149
+ tile_id, behavior, collision, elevation = tile
150
+ behavior_name = behavior.name if hasattr(behavior, 'name') else f"Raw({behavior})"
151
+
152
+ if dy == 0 and dx == 0:
153
+ print(f" Player: {behavior_name} (collision={collision})")
154
+ elif "DOOR" in behavior_name:
155
+ print(f" Door found at ({dx:+2d},{dy:+2d}): {behavior_name}")
156
+ elif collision == 0:
157
+ print(f" Walkable at ({dx:+2d},{dy:+2d}): {behavior_name}")
158
+
159
+ def _validate_map_structure(self, map_data, location_name, area_type):
160
+ """Validate map structure"""
161
+ if not map_data or len(map_data) == 0:
162
+ return {"is_valid": False, "message": "Empty map data"}
163
+
164
+ total_tiles = sum(len(row) for row in map_data)
165
+ unknown_tiles = 0
166
+ valid_tiles = 0
167
+
168
+ for row in map_data:
169
+ for tile in row:
170
+ if len(tile) >= 2:
171
+ behavior = tile[1]
172
+ if hasattr(behavior, 'name'):
173
+ behavior_name = behavior.name
174
+ elif isinstance(behavior, int):
175
+ try:
176
+ from pokemon_env.enums import MetatileBehavior
177
+ behavior_enum = MetatileBehavior(behavior)
178
+ behavior_name = behavior_enum.name
179
+ except ValueError:
180
+ behavior_name = "UNKNOWN"
181
+ else:
182
+ behavior_name = "UNKNOWN"
183
+
184
+ if behavior_name == "UNKNOWN":
185
+ unknown_tiles += 1
186
+ else:
187
+ valid_tiles += 1
188
+
189
+ unknown_ratio = unknown_tiles / total_tiles if total_tiles > 0 else 0
190
+
191
+ if unknown_ratio > 0.5:
192
+ return {"is_valid": False, "message": f"Too many unknown tiles: {unknown_ratio:.1%}"}
193
+
194
+ return {
195
+ "is_valid": True,
196
+ "message": f"Structure valid: {valid_tiles}/{total_tiles} valid tiles ({unknown_ratio:.1%} unknown)"
197
+ }
198
+
199
+ if __name__ == "__main__":
200
+ pytest.main([__file__, "-v", "-s"])
@@ -0,0 +1,284 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Pytest for the house to outside transition bug
4
+ This test reproduces the specific issue where transitioning from house.state
5
+ to outside results in incorrect map data
6
+ """
7
+
8
+ import pytest
9
+ import requests
10
+ import time
11
+ import threading
12
+ import subprocess
13
+ import os
14
+ from pathlib import Path
15
+
16
+ from tests.test_memory_map import format_map_data, MetatileBehavior
17
+
18
+ # Test configuration
19
+ SERVER_PORT = 8002 # Use different port to avoid conflicts
20
+ SERVER_URL = f"http://127.0.0.1:{SERVER_PORT}"
21
+
22
+ class TestHouseToOutsideTransition:
23
+
24
+ @classmethod
25
+ def setup_class(cls):
26
+ """Start server with house.state before running tests"""
27
+ print(f"\n🚀 Starting server on port {SERVER_PORT} with house.state...")
28
+
29
+ # Start server in background
30
+ project_root = Path.cwd()
31
+ server_cmd = [
32
+ "python", "-m", "server.app",
33
+ "--load-state", "tests/states/house.state",
34
+ "--port", str(SERVER_PORT),
35
+ "--manual"
36
+ ]
37
+
38
+ cls.server_process = subprocess.Popen(
39
+ server_cmd,
40
+ stdout=subprocess.PIPE,
41
+ stderr=subprocess.PIPE,
42
+ text=True,
43
+ env={**os.environ, "CONDA_DEFAULT_ENV": "mgba"}
44
+ )
45
+
46
+ # Wait for server to start
47
+ max_wait = 30
48
+ for i in range(max_wait):
49
+ try:
50
+ response = requests.get(f"{SERVER_URL}/status", timeout=1)
51
+ if response.status_code == 200:
52
+ print(f"✅ Server started successfully after {i+1} seconds")
53
+ break
54
+ except requests.exceptions.RequestException:
55
+ if i < max_wait - 1:
56
+ time.sleep(1)
57
+ continue
58
+ else:
59
+ # Kill process if it started but isn't responding
60
+ cls.server_process.terminate()
61
+ cls.server_process.wait()
62
+ raise Exception(f"Server failed to start within {max_wait} seconds")
63
+
64
+ @classmethod
65
+ def teardown_class(cls):
66
+ """Stop server after all tests"""
67
+ print("\n🛑 Stopping server...")
68
+ if hasattr(cls, 'server_process'):
69
+ cls.server_process.terminate()
70
+ cls.server_process.wait()
71
+ print("✅ Server stopped")
72
+
73
+ def test_initial_house_map(self):
74
+ """Test that the initial house map matches the expected ground truth"""
75
+ print("\n📍 Testing initial house map...")
76
+
77
+ # Get state from server
78
+ response = requests.get(f"{SERVER_URL}/state", timeout=5)
79
+ assert response.status_code == 200, "Failed to get server state"
80
+
81
+ state_data = response.json()
82
+
83
+ # Verify location
84
+ location = state_data.get('player', {}).get('location', '')
85
+ assert 'BRENDANS HOUSE 1F' in location.upper(), f"Expected house location, got: {location}"
86
+
87
+ # Get map tiles
88
+ assert 'map' in state_data, "No map data in state"
89
+ assert 'tiles' in state_data['map'], "No tiles in map data"
90
+
91
+ map_tiles = state_data['map']['tiles']
92
+ assert len(map_tiles) > 0, "Map tiles are empty"
93
+
94
+ # Format map data (convert from server format to test format)
95
+ formatted_map = self._format_server_map_data(map_tiles, f"House Map - {location}")
96
+
97
+ # Load expected ground truth
98
+ truth_path = Path("tests/states/house_map_truth.txt")
99
+ if truth_path.exists():
100
+ with open(truth_path, 'r') as f:
101
+ expected_map = f.read().strip()
102
+
103
+ # Compare maps (allowing for some flexibility in coordinates/format)
104
+ assert self._maps_are_similar(formatted_map, expected_map), \
105
+ f"House map doesn't match expected format:\n\nActual:\n{formatted_map}\n\nExpected:\n{expected_map}"
106
+ else:
107
+ print(f"⚠️ Ground truth file not found at {truth_path}")
108
+ print(f"House map format:\n{formatted_map}")
109
+ # Don't fail if ground truth doesn't exist, just verify basic structure
110
+ assert "HOUSE" in formatted_map or "BRENDAN" in formatted_map, "Map should contain house-related content"
111
+
112
+ def test_walk_outside_transition(self):
113
+ """Test walking outside from house and verify map is correct"""
114
+ print("\n🚶 Testing transition from house to outside...")
115
+
116
+ # First, check initial position
117
+ response = requests.get(f"{SERVER_URL}/state", timeout=5)
118
+ initial_state = response.json()
119
+ initial_pos = initial_state.get('player', {}).get('position', {})
120
+ print(f" Initial position: ({initial_pos.get('x', '?')}, {initial_pos.get('y', '?')})")
121
+
122
+ # Walk down until we exit the house (up to 10 steps)
123
+ steps_taken = 0
124
+ max_steps = 10
125
+
126
+ for i in range(max_steps):
127
+ print(f" Step {i+1}: Walking DOWN...")
128
+ response = requests.post(f"{SERVER_URL}/action",
129
+ json={"type": "button", "button": "down"},
130
+ timeout=5)
131
+ assert response.status_code == 200, f"Failed to send DOWN action on step {i+1}"
132
+ time.sleep(0.5) # Longer delay to ensure movement completes
133
+
134
+ # Check current location after this step
135
+ response = requests.get(f"{SERVER_URL}/state", timeout=5)
136
+ state_data = response.json()
137
+ location = state_data.get('player', {}).get('location', '')
138
+ position = state_data.get('player', {}).get('position', {})
139
+
140
+ print(f" After step {i+1}: {location} at ({position.get('x', '?')}, {position.get('y', '?')})")
141
+
142
+ # Check if we've exited the house
143
+ if 'HOUSE' not in location.upper():
144
+ print(f" ✅ Exited house after {i+1} steps!")
145
+ steps_taken = i + 1
146
+ break
147
+
148
+ if steps_taken == 0:
149
+ # If we never exited, show current state for debugging
150
+ print(f" ❌ Never exited house after {max_steps} steps. Current location: {location}")
151
+ assert False, f"Failed to exit house after {max_steps} DOWN movements"
152
+
153
+ # Get state after transition
154
+ response = requests.get(f"{SERVER_URL}/state", timeout=5)
155
+ assert response.status_code == 200, "Failed to get server state after transition"
156
+
157
+ state_data = response.json()
158
+
159
+ # Verify we're now outside
160
+ location = state_data.get('player', {}).get('location', '')
161
+ assert 'LITTLEROOT TOWN' in location.upper(), f"Expected to be in Littleroot Town, got: {location}"
162
+ assert 'HOUSE' not in location.upper(), f"Should be outside house, but got: {location}"
163
+
164
+ # Get map tiles
165
+ assert 'map' in state_data, "No map data in state after transition"
166
+ assert 'tiles' in state_data['map'], "No tiles in map data after transition"
167
+
168
+ map_tiles = state_data['map']['tiles']
169
+ assert len(map_tiles) > 0, "Map tiles are empty after transition"
170
+
171
+ # Validate map quality
172
+ validation_result = self._validate_outside_map(map_tiles, location)
173
+ assert validation_result['is_valid'], f"Outside map validation failed: {validation_result['message']}"
174
+
175
+ # Format and display map for debugging
176
+ formatted_map = self._format_server_map_data(map_tiles, f"Outside Map - {location}")
177
+ print(f"\n🗺️ Outside map:\n{formatted_map}")
178
+
179
+ print(f"✅ Map validation: {validation_result['message']}")
180
+
181
+ def _format_server_map_data(self, server_tiles, title="Map Data"):
182
+ """Convert server tile format to the same format as test_memory_map.py"""
183
+ # Convert server format [tile_id, behavior_int, collision, elevation]
184
+ # to test format (tile_id, behavior_enum, collision, elevation)
185
+ formatted_tiles = []
186
+
187
+ for row in server_tiles:
188
+ formatted_row = []
189
+ for tile in row:
190
+ if len(tile) >= 4:
191
+ tile_id, behavior_int, collision, elevation = tile
192
+
193
+ # Convert behavior integer to enum for compatibility
194
+ try:
195
+ behavior_enum = MetatileBehavior(behavior_int)
196
+ except ValueError:
197
+ behavior_enum = None # Will be handled as "UNKNOWN" in format function
198
+
199
+ formatted_row.append((tile_id, behavior_enum, collision, elevation))
200
+ else:
201
+ # Fallback for incomplete tile data
202
+ formatted_row.append((0, None, 0, 0))
203
+
204
+ formatted_tiles.append(formatted_row)
205
+
206
+ return format_map_data(formatted_tiles, title)
207
+
208
+ def _maps_are_similar(self, actual, expected):
209
+ """Check if two maps are similar (allowing for minor differences)"""
210
+ # For now, just check that both contain reasonable map structure
211
+ # Could be made more sophisticated later
212
+
213
+ # Both should have map dimensions
214
+ actual_has_dimensions = "Map dimensions:" in actual
215
+ expected_has_dimensions = "Map dimensions:" in expected
216
+
217
+ # Both should have traversability map
218
+ actual_has_traversability = "TRAVERSABILITY MAP" in actual
219
+ expected_has_traversability = "TRAVERSABILITY MAP" in expected
220
+
221
+ # Both should have player position
222
+ actual_has_player = " P " in actual
223
+ expected_has_player = " P " in expected
224
+
225
+ return (actual_has_dimensions and expected_has_dimensions and
226
+ actual_has_traversability and expected_has_traversability and
227
+ actual_has_player and expected_has_player)
228
+
229
+ def _validate_outside_map(self, map_tiles, location_name):
230
+ """Validate that outside map looks reasonable"""
231
+ if not map_tiles or len(map_tiles) == 0:
232
+ return {"is_valid": False, "message": "Empty map data"}
233
+
234
+ total_tiles = sum(len(row) for row in map_tiles)
235
+ unknown_tiles = 0
236
+ walkable_tiles = 0
237
+ wall_tiles = 0
238
+ special_tiles = 0
239
+
240
+ for row in map_tiles:
241
+ for tile in row:
242
+ if len(tile) >= 4:
243
+ tile_id, behavior_int, collision, elevation = tile
244
+
245
+ # Convert behavior
246
+ try:
247
+ behavior_enum = MetatileBehavior(behavior_int)
248
+ behavior_name = behavior_enum.name
249
+ except ValueError:
250
+ behavior_name = "UNKNOWN"
251
+
252
+ if behavior_name == "UNKNOWN":
253
+ unknown_tiles += 1
254
+ elif behavior_name == "NORMAL":
255
+ if collision == 0:
256
+ walkable_tiles += 1
257
+ else:
258
+ wall_tiles += 1
259
+ else:
260
+ special_tiles += 1
261
+
262
+ unknown_ratio = unknown_tiles / total_tiles if total_tiles > 0 else 0
263
+ walkable_ratio = walkable_tiles / total_tiles if total_tiles > 0 else 0
264
+ wall_ratio = wall_tiles / total_tiles if total_tiles > 0 else 0
265
+
266
+ # Validation rules for outside area
267
+ if unknown_ratio > 0.2:
268
+ return {"is_valid": False, "message": f"Too many unknown tiles: {unknown_ratio:.1%}"}
269
+
270
+ if walkable_ratio < 0.15:
271
+ return {"is_valid": False, "message": f"Too few walkable tiles: {walkable_ratio:.1%}"}
272
+
273
+ if wall_ratio > 0.95:
274
+ return {"is_valid": False, "message": f"Too many walls: {wall_ratio:.1%}"}
275
+
276
+ return {
277
+ "is_valid": True,
278
+ "message": f"Map valid: {walkable_ratio:.1%} walkable, {wall_ratio:.1%} walls, {unknown_ratio:.1%} unknown"
279
+ }
280
+
281
+ if __name__ == "__main__":
282
+ # Run the test directly
283
+ import sys
284
+ pytest.main([__file__, "-v", "-s"] + sys.argv[1:])