synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (229) hide show
  1. examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
  2. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
  4. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
  5. examples/multi_step/crafter_rl_lora.md +51 -10
  6. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  7. examples/multi_step/task_app_config_notes.md +7 -1
  8. examples/swe/task_app/grpo_swe_mini.py +55 -26
  9. examples/swe/task_app/hosted/rollout.py +40 -0
  10. examples/swe/task_app/hosted/test_service.py +5 -6
  11. examples/task_apps/TESTING.md +275 -0
  12. examples/task_apps/__init__.py +0 -0
  13. examples/task_apps/crafter/__init__.py +0 -0
  14. examples/task_apps/crafter/task_app/__init__.py +2 -0
  15. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
  16. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  17. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
  18. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
  19. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
  20. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
  21. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  22. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  23. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  24. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  25. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  26. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  27. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  28. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  29. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  30. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  31. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  32. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  33. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  34. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  35. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  36. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  37. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  38. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  39. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  40. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  41. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  42. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  43. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  44. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  45. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  78. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  79. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  80. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  81. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  82. examples/task_apps/enron/__init__.py +1 -0
  83. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  84. examples/task_apps/enron/task_app/README.md +14 -0
  85. examples/task_apps/enron/task_app/__init__.py +1 -0
  86. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  87. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  88. examples/task_apps/enron/tests/__init__.py +2 -0
  89. examples/task_apps/enron/tests/conftest.py +115 -0
  90. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  91. examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
  92. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  93. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  94. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  95. examples/task_apps/math/__init__.py +0 -0
  96. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  97. examples/task_apps/pokemon_battle/__init__.py +2 -0
  98. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  99. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  100. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  101. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  102. examples/task_apps/pokemon_red/README.md +357 -0
  103. examples/task_apps/pokemon_red/__init__.py +3 -0
  104. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  105. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
  106. examples/task_apps/pokemon_red/task_app.py +606 -0
  107. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
  108. examples/task_apps/sokoban/README.md +307 -0
  109. examples/task_apps/sokoban/__init__.py +3 -0
  110. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  111. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  112. examples/task_apps/sokoban/task_app.py +1058 -0
  113. examples/task_apps/sokoban/tests/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/conftest.py +113 -0
  115. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  116. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  117. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  118. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  119. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  120. examples/task_apps/verilog/__init__.py +1 -0
  121. examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
  122. examples/task_apps/verilog/task_app/README.md +12 -0
  123. examples/task_apps/verilog/task_app/__init__.py +1 -0
  124. examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
  125. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  126. examples/task_apps/verilog/tests/__init__.py +2 -0
  127. examples/task_apps/verilog/tests/conftest.py +115 -0
  128. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  129. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
  130. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  131. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  132. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  133. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  134. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  135. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
  136. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
  137. examples/warming_up_to_rl/run_eval.py +127 -18
  138. examples/workflows/__init__.py +0 -0
  139. examples/workflows/math_rl/__init__.py +0 -0
  140. examples/workflows/math_rl/download_dataset.py +80 -0
  141. synth_ai/__init__.py +41 -1
  142. synth_ai/api/train/builders.py +73 -29
  143. synth_ai/api/train/cli.py +12 -6
  144. synth_ai/api/train/configs/__init__.py +44 -0
  145. synth_ai/api/train/configs/rl.py +134 -0
  146. synth_ai/api/train/configs/sft.py +95 -0
  147. synth_ai/api/train/configs/shared.py +24 -0
  148. synth_ai/api/train/env_resolver.py +5 -2
  149. synth_ai/api/train/supported_algos.py +10 -5
  150. synth_ai/api/train/utils.py +7 -4
  151. synth_ai/cli/__init__.py +7 -51
  152. synth_ai/cli/_storage.py +4 -3
  153. synth_ai/cli/_validate_task_app.py +11 -0
  154. synth_ai/cli/balance.py +4 -3
  155. synth_ai/cli/calc.py +2 -2
  156. synth_ai/cli/demo.py +49 -43
  157. synth_ai/cli/legacy_root_backup.py +1 -1
  158. synth_ai/cli/rl_demo.py +86 -106
  159. synth_ai/cli/root.py +0 -97
  160. synth_ai/cli/task_apps.py +1710 -186
  161. synth_ai/demos/core/cli.py +121 -159
  162. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
  163. synth_ai/environments/examples/crafter_classic/environment.py +16 -0
  164. synth_ai/environments/examples/enron/engine.py +7 -2
  165. synth_ai/environments/examples/enron/environment.py +68 -0
  166. synth_ai/environments/examples/red/engine.py +27 -0
  167. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  168. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  169. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  170. synth_ai/environments/examples/red/environment.py +60 -0
  171. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  172. synth_ai/environments/examples/verilog/engine.py +30 -4
  173. synth_ai/evals/__init__.py +15 -0
  174. synth_ai/evals/client.py +82 -0
  175. synth_ai/evals/types.py +42 -0
  176. synth_ai/jobs/client.py +16 -4
  177. synth_ai/judge_schemas.py +127 -0
  178. synth_ai/py.typed +0 -0
  179. synth_ai/task/__init__.py +14 -5
  180. synth_ai/task/contracts.py +124 -38
  181. synth_ai/task/proxy.py +48 -56
  182. synth_ai/task/rubrics/__init__.py +53 -0
  183. synth_ai/task/rubrics/loaders.py +133 -0
  184. synth_ai/task/rubrics/models.py +57 -0
  185. synth_ai/task/rubrics/scoring.py +113 -0
  186. synth_ai/task/rubrics/strict.py +149 -0
  187. synth_ai/task/server.py +8 -7
  188. synth_ai/task/validators.py +269 -6
  189. synth_ai/tracing_v3/decorators.py +7 -3
  190. synth_ai/tracing_v3/replica_sync.py +4 -4
  191. synth_ai/tracing_v3/serialization.py +130 -0
  192. synth_ai/tracing_v3/trace_utils.py +317 -0
  193. synth_ai/tracing_v3/turso/native_manager.py +3 -3
  194. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
  195. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
  196. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
  197. synth_ai/task/rubrics.py +0 -219
  198. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  199. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  200. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  201. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  202. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  203. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  204. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  205. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  206. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
  207. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
  208. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  209. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  210. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  211. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  212. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  213. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  214. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  215. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  216. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  217. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
  218. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  219. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  220. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  221. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  222. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  223. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  224. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  225. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  226. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  227. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
  228. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
  229. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,468 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Pytest to create output maps and compare to ground truth for both direct and server
4
+ """
5
+
6
+ import pytest
7
+ import os
8
+ import time
9
+ import subprocess
10
+ import requests
11
+ import tempfile
12
+ from pathlib import Path
13
+ from pokemon_env.emulator import EmeraldEmulator
14
+
15
+
16
+ class TestMapGroundTruthComparison:
17
+ """Test suite for comparing map outputs to ground truth"""
18
+
19
+ @pytest.fixture(scope="class")
20
+ def output_dir(self):
21
+ """Create output directory for test results"""
22
+ output_path = Path("test_outputs/pytest_maps")
23
+ output_path.mkdir(parents=True, exist_ok=True)
24
+ return output_path
25
+
26
+ @pytest.fixture(scope="class")
27
+ def ground_truth_dir(self):
28
+ """Path to ground truth files"""
29
+ return Path("tests/ground_truth")
30
+
31
+ def format_map_for_comparison(self, tiles, title, location, position):
32
+ """Format map tiles for comparison with ground truth format"""
33
+ if not tiles:
34
+ return f"=== {title} ===\nNo tiles available\n"
35
+
36
+ output = []
37
+ output.append(f"=== {title} ===")
38
+ output.append(f"Format: (MetatileID, Behavior, X, Y)")
39
+ output.append(f"Map dimensions: {len(tiles)}x{len(tiles[0]) if tiles else 0}")
40
+ output.append("")
41
+ output.append("--- TRAVERSABILITY MAP ---")
42
+
43
+ # Header with column numbers
44
+ header = " " + " ".join(f"{i:2}" for i in range(len(tiles[0]) if tiles else 0))
45
+ output.append(header)
46
+ output.append(" " + "-" * (len(header) - 4))
47
+
48
+ # Map rows
49
+ for row_idx, row in enumerate(tiles):
50
+ traversability_row = []
51
+ for col_idx, tile in enumerate(row):
52
+ if len(tile) >= 4:
53
+ tile_id, behavior, collision, elevation = tile
54
+ behavior_val = behavior if not hasattr(behavior, 'value') else behavior.value
55
+
56
+ # Convert to traversability symbol
57
+ if behavior_val == 0: # NORMAL
58
+ symbol = "." if collision == 0 else "#"
59
+ elif behavior_val == 1: # SECRET_BASE_WALL
60
+ symbol = "#"
61
+ elif behavior_val == 51: # IMPASSABLE_SOUTH
62
+ symbol = "IM"
63
+ elif behavior_val == 96: # NON_ANIMATED_DOOR
64
+ symbol = "D"
65
+ elif behavior_val == 101: # SOUTH_ARROW_WARP
66
+ symbol = "SO"
67
+ elif behavior_val == 105: # ANIMATED_DOOR
68
+ symbol = "D"
69
+ elif behavior_val == 134: # TELEVISION
70
+ symbol = "TE"
71
+ else:
72
+ symbol = "." # Default to walkable for other behaviors
73
+
74
+ # Mark player position
75
+ if position and len(position) >= 2:
76
+ # Calculate if this tile is player position
77
+ # Player is at center of 15x15 map (position 7,7)
78
+ if row_idx == 7 and col_idx == 7:
79
+ symbol = "P"
80
+
81
+ traversability_row.append(symbol)
82
+ else:
83
+ traversability_row.append("?")
84
+
85
+ # Format row with row number
86
+ row_str = f"{row_idx:2}: " + " ".join(f"{symbol:1}" for symbol in traversability_row)
87
+ output.append(row_str)
88
+
89
+ return "\n".join(output)
90
+
91
+ def save_map_output(self, tiles, output_file, title, location, position):
92
+ """Save map output to file"""
93
+ formatted_output = self.format_map_for_comparison(tiles, title, location, position)
94
+
95
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
96
+ with open(output_file, 'w') as f:
97
+ f.write(formatted_output)
98
+
99
+ return formatted_output
100
+
101
+ def compare_with_ground_truth(self, output_content, ground_truth_file):
102
+ """Compare output with ground truth file"""
103
+ if not os.path.exists(ground_truth_file):
104
+ return False, f"Ground truth file not found: {ground_truth_file}"
105
+
106
+ with open(ground_truth_file, 'r') as f:
107
+ ground_truth_content = f.read()
108
+
109
+ # Extract just the traversability map for comparison
110
+ def extract_traversability_map(content):
111
+ lines = content.split('\n')
112
+ map_lines = []
113
+ in_map_section = False
114
+
115
+ for line in lines:
116
+ if "--- TRAVERSABILITY MAP ---" in line:
117
+ in_map_section = True
118
+ continue
119
+ elif in_map_section and line.strip() and not line.startswith('='):
120
+ if line.strip().startswith('---') or 'Map dimensions' in line:
121
+ continue
122
+ if ':' in line: # Map row
123
+ map_lines.append(line)
124
+
125
+ return '\n'.join(map_lines)
126
+
127
+ output_map = extract_traversability_map(output_content)
128
+ ground_truth_map = extract_traversability_map(ground_truth_content)
129
+
130
+ lines_match = output_map.strip() == ground_truth_map.strip()
131
+
132
+ if not lines_match:
133
+ # Calculate similarity metrics
134
+ output_lines = output_map.strip().split('\n')
135
+ gt_lines = ground_truth_map.strip().split('\n')
136
+
137
+ matching_lines = 0
138
+ total_lines = max(len(output_lines), len(gt_lines))
139
+
140
+ for i in range(min(len(output_lines), len(gt_lines))):
141
+ if output_lines[i] == gt_lines[i]:
142
+ matching_lines += 1
143
+
144
+ similarity = (matching_lines / total_lines * 100) if total_lines > 0 else 0
145
+
146
+ return False, f"Maps don't match (similarity: {similarity:.1f}%)\nExpected:\n{ground_truth_map}\nActual:\n{output_map}"
147
+
148
+ return True, "Maps match ground truth perfectly"
149
+
150
+ def test_direct_emulator_house_map(self, output_dir, ground_truth_dir):
151
+ """Test direct emulator house map against ground truth"""
152
+ # Initialize direct emulator
153
+ emu = EmeraldEmulator('Emerald-GBAdvance/rom.gba', headless=True, sound=False)
154
+ emu.initialize()
155
+ emu.load_state('tests/states/house.state')
156
+
157
+ try:
158
+ # Get house map
159
+ state = emu.memory_reader.get_comprehensive_state()
160
+ location = state['player']['location']
161
+ position = state['player']['position']
162
+ tiles = state['map']['tiles']
163
+
164
+ # Save output
165
+ output_file = output_dir / "direct_emulator_house.txt"
166
+ output_content = self.save_map_output(
167
+ tiles, output_file,
168
+ f"House - {location}", location, position
169
+ )
170
+
171
+ # Compare with ground truth
172
+ ground_truth_file = ground_truth_dir / "house_direct_emulator.txt"
173
+ matches, message = self.compare_with_ground_truth(output_content, ground_truth_file)
174
+
175
+ print(f"Direct emulator house map saved to: {output_file}")
176
+ print(f"Comparison result: {message}")
177
+
178
+ # Allow test to pass even if ground truth doesn't exist yet
179
+ if not os.path.exists(ground_truth_file):
180
+ pytest.skip(f"Ground truth file not found: {ground_truth_file}")
181
+
182
+ assert matches, f"Direct emulator house map doesn't match ground truth: {message}"
183
+
184
+ finally:
185
+ emu.stop()
186
+
187
+ def test_direct_emulator_outside_map(self, output_dir, ground_truth_dir):
188
+ """Test direct emulator outside map against ground truth"""
189
+ # Initialize direct emulator
190
+ emu = EmeraldEmulator('Emerald-GBAdvance/rom.gba', headless=True, sound=False)
191
+ emu.initialize()
192
+ emu.load_state('tests/states/house.state')
193
+
194
+ try:
195
+ # Move outside
196
+ for i in range(3):
197
+ emu.press_buttons(['down'], hold_frames=25, release_frames=25)
198
+ time.sleep(0.2)
199
+
200
+ # Wait for transition to complete
201
+ time.sleep(0.5)
202
+
203
+ # Get outside map
204
+ state = emu.memory_reader.get_comprehensive_state()
205
+ location = state['player']['location']
206
+ position = state['player']['position']
207
+ tiles = state['map']['tiles']
208
+
209
+ # Save output
210
+ output_file = output_dir / "direct_emulator_outside.txt"
211
+ output_content = self.save_map_output(
212
+ tiles, output_file,
213
+ f"Outside - {location}", location, position
214
+ )
215
+
216
+ # Compare with ground truth
217
+ ground_truth_file = ground_truth_dir / "outside_direct_emulator.txt"
218
+ matches, message = self.compare_with_ground_truth(output_content, ground_truth_file)
219
+
220
+ print(f"Direct emulator outside map saved to: {output_file}")
221
+ print(f"Comparison result: {message}")
222
+
223
+ # Allow test to pass even if ground truth doesn't exist yet
224
+ if not os.path.exists(ground_truth_file):
225
+ pytest.skip(f"Ground truth file not found: {ground_truth_file}")
226
+
227
+ assert matches, f"Direct emulator outside map doesn't match ground truth: {message}"
228
+
229
+ finally:
230
+ emu.stop()
231
+
232
+ def test_server_house_map(self, output_dir, ground_truth_dir):
233
+ """Test server house map against ground truth"""
234
+ # Kill any existing server
235
+ os.system("pkill -f 'server.app' 2>/dev/null")
236
+ time.sleep(2)
237
+
238
+ # Start server
239
+ server_cmd = ["python", "-m", "server.app", "--load-state", "tests/states/house.state", "--port", "8101", "--manual"]
240
+ server_process = subprocess.Popen(server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
241
+ server_url = "http://127.0.0.1:8101"
242
+
243
+ try:
244
+ # Wait for server startup
245
+ for i in range(20):
246
+ try:
247
+ response = requests.get(f"{server_url}/status", timeout=2)
248
+ if response.status_code == 200:
249
+ break
250
+ except requests.exceptions.RequestException:
251
+ time.sleep(1)
252
+ else:
253
+ pytest.fail("Server failed to start")
254
+
255
+ # Get house map
256
+ response = requests.get(f"{server_url}/state", timeout=5)
257
+ state = response.json()
258
+
259
+ location = state['player']['location']
260
+ position = (state['player']['position']['x'], state['player']['position']['y'])
261
+ tiles = state['map']['tiles']
262
+
263
+ # Save output
264
+ output_file = output_dir / "server_house.txt"
265
+ output_content = self.save_map_output(
266
+ tiles, output_file,
267
+ f"House - {location}", location, position
268
+ )
269
+
270
+ # Compare with ground truth
271
+ ground_truth_file = ground_truth_dir / "house_server.txt"
272
+ matches, message = self.compare_with_ground_truth(output_content, ground_truth_file)
273
+
274
+ print(f"Server house map saved to: {output_file}")
275
+ print(f"Comparison result: {message}")
276
+
277
+ # Allow test to pass even if ground truth doesn't exist yet
278
+ if not os.path.exists(ground_truth_file):
279
+ pytest.skip(f"Ground truth file not found: {ground_truth_file}")
280
+
281
+ assert matches, f"Server house map doesn't match ground truth: {message}"
282
+
283
+ finally:
284
+ server_process.terminate()
285
+ try:
286
+ server_process.wait(timeout=3)
287
+ except subprocess.TimeoutExpired:
288
+ server_process.kill()
289
+
290
+ def test_server_outside_map(self, output_dir, ground_truth_dir):
291
+ """Test server outside map against ground truth"""
292
+ # Kill any existing server
293
+ # os.system("pkill -f 'server.app' 2>/dev/null")
294
+ # time.sleep(2)
295
+
296
+ # Start server
297
+ # server_cmd = ["python", "-m", "server.app", "--load-state", "tests/states/house.state", "--port", "8102", "--manual"]
298
+ # server_process = subprocess.Popen(server_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
299
+ server_url = "http://127.0.0.1:8000"
300
+
301
+ try:
302
+ # Wait for server startup
303
+ for i in range(20):
304
+ try:
305
+ response = requests.get(f"{server_url}/status", timeout=2)
306
+ if response.status_code == 200:
307
+ break
308
+ except requests.exceptions.RequestException:
309
+ time.sleep(1)
310
+ else:
311
+ pytest.fail("Server failed to start")
312
+
313
+ # Enhanced movement to reach position (5,11) like direct emulator
314
+ target_pos = (5, 11)
315
+ max_moves = 6
316
+ for move_num in range(max_moves):
317
+ try:
318
+ # Check current position
319
+ response = requests.get(f"{server_url}/state", timeout=10)
320
+ state = response.json()
321
+ current_pos = (state['player']['position']['x'], state['player']['position']['y'])
322
+ current_location = state['player']['location']
323
+
324
+ # If we've reached the target position, stop
325
+ if current_pos == target_pos:
326
+ break
327
+
328
+ # If we're in outdoor area but not at target Y, keep moving down
329
+ if "LITTLEROOT TOWN" in current_location and "HOUSE" not in current_location:
330
+ if current_pos[1] < target_pos[1]:
331
+ print('Posting action down')
332
+ requests.post(f"{server_url}/action", json={"buttons": ["DOWN"]}, timeout=5)
333
+ time.sleep(0.2)
334
+ continue
335
+ else:
336
+ break
337
+ else:
338
+ # Still in house, keep moving down
339
+ requests.post(f"{server_url}/action", json={"buttons": ["DOWN"]}, timeout=5)
340
+ time.sleep(0.2)
341
+
342
+ except Exception:
343
+ time.sleep(0.5)
344
+
345
+ # Enhanced buffer synchronization
346
+ # for i in range(3):
347
+ # try:
348
+ # requests.post(f"{server_url}/debug/clear_cache", json={}, timeout=5)
349
+ # time.sleep(0.2)
350
+ # except:
351
+ # pass
352
+
353
+ # try:
354
+ # requests.post(f"{server_url}/debug/force_buffer_redetection", json={}, timeout=5)
355
+ # time.sleep(1.0)
356
+ # except:
357
+ # pass
358
+
359
+ # Get outside map
360
+ response = requests.get(f"{server_url}/state", timeout=15)
361
+ state = response.json()
362
+
363
+ location = state['player']['location']
364
+ position = (state['player']['position']['x'], state['player']['position']['y'])
365
+ tiles = state['map']['tiles']
366
+
367
+ # Save output
368
+ output_file = output_dir / "server_outside.txt"
369
+ output_content = self.save_map_output(
370
+ tiles, output_file,
371
+ f"Outside - {location}", location, position
372
+ )
373
+
374
+ # Compare with ground truth
375
+ ground_truth_file = ground_truth_dir / "outside_server.txt"
376
+ matches, message = self.compare_with_ground_truth(output_content, ground_truth_file)
377
+
378
+ print(f"Server outside map saved to: {output_file}")
379
+ print(f"Comparison result: {message}")
380
+
381
+ # Allow test to pass even if ground truth doesn't exist yet
382
+ if not os.path.exists(ground_truth_file):
383
+ pytest.skip(f"Ground truth file not found: {ground_truth_file}")
384
+
385
+ assert matches, f"Server outside map doesn't match ground truth: {message}"
386
+
387
+ finally:
388
+ server_process.terminate()
389
+ try:
390
+ server_process.wait(timeout=3)
391
+ except subprocess.TimeoutExpired:
392
+ server_process.kill()
393
+
394
+ def test_cross_comparison_house(self, output_dir):
395
+ """Test that direct emulator and server produce identical house maps"""
396
+ # This test runs after the individual tests and compares their outputs
397
+ direct_file = output_dir / "direct_emulator_house.txt"
398
+ server_file = output_dir / "server_house.txt"
399
+
400
+ if not direct_file.exists() or not server_file.exists():
401
+ pytest.skip("Individual map tests must run first")
402
+
403
+ with open(direct_file, 'r') as f:
404
+ direct_content = f.read()
405
+
406
+ with open(server_file, 'r') as f:
407
+ server_content = f.read()
408
+
409
+ # Compare the traversability maps
410
+ def extract_traversability_map(content):
411
+ lines = content.split('\n')
412
+ map_lines = []
413
+ in_map_section = False
414
+
415
+ for line in lines:
416
+ if "--- TRAVERSABILITY MAP ---" in line:
417
+ in_map_section = True
418
+ continue
419
+ elif in_map_section and line.strip() and ':' in line:
420
+ map_lines.append(line)
421
+
422
+ return '\n'.join(map_lines)
423
+
424
+ direct_map = extract_traversability_map(direct_content)
425
+ server_map = extract_traversability_map(server_content)
426
+
427
+ assert direct_map == server_map, f"House maps don't match between direct emulator and server:\nDirect:\n{direct_map}\nServer:\n{server_map}"
428
+
429
+ def test_cross_comparison_outside(self, output_dir):
430
+ """Test that direct emulator and server produce identical outside maps"""
431
+ # This test runs after the individual tests and compares their outputs
432
+ direct_file = output_dir / "direct_emulator_outside.txt"
433
+ server_file = output_dir / "server_outside.txt"
434
+
435
+ if not direct_file.exists() or not server_file.exists():
436
+ pytest.skip("Individual map tests must run first")
437
+
438
+ with open(direct_file, 'r') as f:
439
+ direct_content = f.read()
440
+
441
+ with open(server_file, 'r') as f:
442
+ server_content = f.read()
443
+
444
+ # Compare the traversability maps
445
+ def extract_traversability_map(content):
446
+ lines = content.split('\n')
447
+ map_lines = []
448
+ in_map_section = False
449
+
450
+ for line in lines:
451
+ if "--- TRAVERSABILITY MAP ---" in line:
452
+ in_map_section = True
453
+ continue
454
+ elif in_map_section and line.strip() and ':' in line:
455
+ map_lines.append(line)
456
+
457
+ return '\n'.join(map_lines)
458
+
459
+ direct_map = extract_traversability_map(direct_content)
460
+ server_map = extract_traversability_map(server_content)
461
+
462
+ assert direct_map == server_map, f"Outside maps don't match between direct emulator and server:\nDirect:\n{direct_map}\nServer:\n{server_map}"
463
+
464
+
465
+ if __name__ == "__main__":
466
+ # Allow running as script for manual testing
467
+ import sys
468
+ sys.exit(pytest.main([__file__, "-v", "-s"]))