synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (226) hide show
  1. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -1
  2. examples/swe/task_app/grpo_swe_mini.py +55 -26
  3. examples/swe/task_app/hosted/rollout.py +40 -0
  4. examples/swe/task_app/hosted/test_service.py +5 -6
  5. examples/task_apps/TESTING.md +275 -0
  6. examples/task_apps/__init__.py +0 -0
  7. examples/task_apps/crafter/__init__.py +0 -0
  8. examples/task_apps/crafter/task_app/__init__.py +2 -0
  9. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +18 -13
  10. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  11. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
  12. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +25 -3
  13. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +10 -0
  14. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  15. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  16. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  17. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  18. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  19. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  20. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  21. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  22. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  23. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  24. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  25. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  26. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  27. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  28. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  29. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  30. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  31. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  32. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  33. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  34. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  35. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  36. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  37. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  38. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  39. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  40. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  41. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  42. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  43. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  44. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  45. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  71. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  72. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  73. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  74. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  75. examples/task_apps/enron/__init__.py +1 -0
  76. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  77. examples/task_apps/enron/task_app/README.md +14 -0
  78. examples/task_apps/enron/task_app/__init__.py +1 -0
  79. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  80. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  81. examples/task_apps/enron/tests/__init__.py +2 -0
  82. examples/task_apps/enron/tests/conftest.py +115 -0
  83. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  84. examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
  85. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  86. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  87. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  88. examples/task_apps/math/__init__.py +0 -0
  89. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  90. examples/task_apps/pokemon_battle/__init__.py +2 -0
  91. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  92. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  93. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  94. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  95. examples/task_apps/pokemon_red/README.md +357 -0
  96. examples/task_apps/pokemon_red/__init__.py +3 -0
  97. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  98. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
  99. examples/task_apps/pokemon_red/task_app.py +606 -0
  100. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
  101. examples/task_apps/sokoban/README.md +307 -0
  102. examples/task_apps/sokoban/__init__.py +3 -0
  103. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  104. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  105. examples/task_apps/sokoban/task_app.py +1058 -0
  106. examples/task_apps/sokoban/tests/__init__.py +2 -0
  107. examples/task_apps/sokoban/tests/conftest.py +113 -0
  108. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  109. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  110. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  111. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  112. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  113. examples/task_apps/verilog/__init__.py +1 -0
  114. examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
  115. examples/task_apps/verilog/task_app/README.md +12 -0
  116. examples/task_apps/verilog/task_app/__init__.py +1 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
  118. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  119. examples/task_apps/verilog/tests/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/conftest.py +115 -0
  121. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  122. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
  123. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  124. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  125. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  126. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  127. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  128. examples/workflows/__init__.py +0 -0
  129. examples/workflows/math_rl/__init__.py +0 -0
  130. examples/workflows/math_rl/download_dataset.py +80 -0
  131. synth_ai/__init__.py +2 -2
  132. synth_ai/api/train/builders.py +25 -11
  133. synth_ai/api/train/cli.py +12 -6
  134. synth_ai/api/train/configs/__init__.py +10 -10
  135. synth_ai/api/train/configs/rl.py +5 -4
  136. synth_ai/api/train/configs/sft.py +4 -3
  137. synth_ai/api/train/env_resolver.py +5 -2
  138. synth_ai/api/train/supported_algos.py +10 -5
  139. synth_ai/api/train/utils.py +7 -4
  140. synth_ai/cli/__init__.py +7 -51
  141. synth_ai/cli/_storage.py +4 -3
  142. synth_ai/cli/_validate_task_app.py +11 -0
  143. synth_ai/cli/balance.py +4 -3
  144. synth_ai/cli/calc.py +2 -2
  145. synth_ai/cli/demo.py +14 -7
  146. synth_ai/cli/legacy_root_backup.py +1 -1
  147. synth_ai/cli/rl_demo.py +8 -7
  148. synth_ai/cli/root.py +0 -97
  149. synth_ai/cli/task_apps.py +1707 -186
  150. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
  151. synth_ai/environments/examples/enron/engine.py +7 -2
  152. synth_ai/environments/examples/enron/environment.py +68 -0
  153. synth_ai/environments/examples/red/engine.py +27 -0
  154. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  155. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  156. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  157. synth_ai/environments/examples/red/environment.py +60 -0
  158. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  159. synth_ai/environments/examples/verilog/engine.py +30 -4
  160. synth_ai/evals/client.py +58 -61
  161. synth_ai/jobs/client.py +16 -4
  162. synth_ai/judge_schemas.py +16 -16
  163. synth_ai/py.typed +0 -0
  164. synth_ai/task/__init__.py +14 -5
  165. synth_ai/task/contracts.py +124 -38
  166. synth_ai/task/proxy.py +48 -56
  167. synth_ai/task/rubrics/__init__.py +53 -0
  168. synth_ai/task/rubrics/loaders.py +133 -0
  169. synth_ai/task/rubrics/models.py +57 -0
  170. synth_ai/task/rubrics/scoring.py +113 -0
  171. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  172. synth_ai/task/server.py +8 -7
  173. synth_ai/task/validators.py +269 -6
  174. synth_ai/tracing_v3/decorators.py +7 -3
  175. synth_ai/tracing_v3/replica_sync.py +4 -4
  176. synth_ai/tracing_v3/serialization.py +5 -5
  177. synth_ai/tracing_v3/trace_utils.py +317 -0
  178. synth_ai/tracing_v3/turso/native_manager.py +3 -3
  179. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
  180. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +214 -101
  181. examples/agora_ex/README_MoE.md +0 -224
  182. examples/agora_ex/__init__.py +0 -7
  183. examples/agora_ex/agora_ex.py +0 -65
  184. examples/agora_ex/agora_ex_task_app.py +0 -590
  185. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  186. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  187. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  188. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  189. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  190. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  191. synth_ai/rubrics/__init__.py +0 -22
  192. synth_ai/task/rubrics.py +0 -219
  193. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  194. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  195. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  196. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  197. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  198. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  199. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  200. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  201. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
  202. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
  203. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  204. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  205. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  206. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  207. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +0 -0
  208. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  209. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  210. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  211. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  212. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  213. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
  214. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  215. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  216. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  217. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  218. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  219. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  220. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  221. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  222. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  223. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
  224. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -0
  225. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
  226. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,311 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Pytest tests for server-based map validation
4
+ Tests different game states and saves reference outputs for regression testing
5
+ """
6
+
7
+ import pytest
8
+ import requests
9
+ import time
10
+ import subprocess
11
+ import os
12
+ import json
13
+ from pathlib import Path
14
+ from tests.test_memory_map import format_map_data
15
+
16
+
17
+ class ServerMapTester:
18
+ """Helper class for testing server-based map reading"""
19
+
20
+ def __init__(self, port=8010):
21
+ self.port = port
22
+ self.server_url = f"http://127.0.0.1:{port}"
23
+ self.server_process = None
24
+
25
+ def start_server(self, state_file):
26
+ """Start server with a specific state file"""
27
+ self.stop_server() # Ensure clean state
28
+
29
+ server_cmd = [
30
+ "python", "-m", "server.app",
31
+ "--load-state", state_file,
32
+ "--port", str(self.port),
33
+ "--manual"
34
+ ]
35
+
36
+ self.server_process = subprocess.Popen(
37
+ server_cmd,
38
+ stdout=subprocess.PIPE,
39
+ stderr=subprocess.PIPE,
40
+ text=True
41
+ )
42
+
43
+ # Wait for server to start
44
+ for i in range(30):
45
+ try:
46
+ response = requests.get(f"{self.server_url}/status", timeout=2)
47
+ if response.status_code == 200:
48
+ return True
49
+ except requests.exceptions.RequestException:
50
+ time.sleep(1)
51
+
52
+ self.stop_server()
53
+ return False
54
+
55
+ def stop_server(self):
56
+ """Stop the server process"""
57
+ if self.server_process:
58
+ self.server_process.terminate()
59
+ try:
60
+ self.server_process.wait(timeout=5)
61
+ except subprocess.TimeoutExpired:
62
+ self.server_process.kill()
63
+ self.server_process.wait()
64
+ self.server_process = None
65
+
66
+ def get_map_data(self):
67
+ """Get current map data from server"""
68
+ try:
69
+ response = requests.get(f"{self.server_url}/state", timeout=10)
70
+ if response.status_code == 200:
71
+ state = response.json()
72
+ return {
73
+ 'location': state['player']['location'],
74
+ 'position': state['player']['position'],
75
+ 'tiles': state['map']['tiles']
76
+ }
77
+ except Exception as e:
78
+ pytest.fail(f"Failed to get map data: {e}")
79
+ return None
80
+
81
+ def execute_actions(self, actions):
82
+ """Execute a sequence of actions"""
83
+ for action in actions:
84
+ try:
85
+ response = requests.post(f"{self.server_url}/action", json=action, timeout=5)
86
+ if response.status_code != 200:
87
+ pytest.fail(f"Action failed: {action}, status: {response.status_code}")
88
+ time.sleep(0.3) # Allow action to process
89
+ except Exception as e:
90
+ pytest.fail(f"Failed to execute action {action}: {e}")
91
+
92
+
93
+ @pytest.fixture
94
+ def server_tester():
95
+ """Pytest fixture providing a server tester instance"""
96
+ tester = ServerMapTester()
97
+ yield tester
98
+ tester.stop_server()
99
+
100
+
101
+ def save_reference_map(location_name, map_data, reference_dir):
102
+ """Save map data as reference for future comparisons"""
103
+ reference_dir = Path(reference_dir)
104
+ reference_dir.mkdir(exist_ok=True)
105
+
106
+ # Clean filename
107
+ filename = location_name.replace(' ', '_').replace("'", '').lower()
108
+ filename = f"{filename}_reference.json"
109
+
110
+ reference_file = reference_dir / filename
111
+
112
+ reference_data = {
113
+ 'location': map_data['location'],
114
+ 'position': map_data['position'],
115
+ 'tiles': map_data['tiles'],
116
+ 'formatted_map': format_map_data(map_data['tiles'], map_data['location'])
117
+ }
118
+
119
+ with open(reference_file, 'w') as f:
120
+ json.dump(reference_data, f, indent=2)
121
+
122
+ return reference_file
123
+
124
+
125
+ def compare_with_reference(current_map, reference_file):
126
+ """Compare current map with saved reference"""
127
+ if not reference_file.exists():
128
+ return False, f"Reference file {reference_file} does not exist"
129
+
130
+ with open(reference_file, 'r') as f:
131
+ reference = json.load(f)
132
+
133
+ # Compare location
134
+ if current_map['location'] != reference['location']:
135
+ return False, f"Location mismatch: {current_map['location']} != {reference['location']}"
136
+
137
+ # Compare map dimensions
138
+ current_tiles = current_map['tiles']
139
+ reference_tiles = reference['tiles']
140
+
141
+ if len(current_tiles) != len(reference_tiles):
142
+ return False, f"Height mismatch: {len(current_tiles)} != {len(reference_tiles)}"
143
+
144
+ if len(current_tiles[0]) != len(reference_tiles[0]):
145
+ return False, f"Width mismatch: {len(current_tiles[0])} != {len(reference_tiles[0])}"
146
+
147
+ # Compare tile data (allow some tolerance for minor differences)
148
+ differences = 0
149
+ total_tiles = len(current_tiles) * len(current_tiles[0])
150
+
151
+ for y, (current_row, reference_row) in enumerate(zip(current_tiles, reference_tiles)):
152
+ for x, (current_tile, reference_tile) in enumerate(zip(current_row, reference_row)):
153
+ if current_tile != reference_tile:
154
+ differences += 1
155
+
156
+ difference_ratio = differences / total_tiles if total_tiles > 0 else 0
157
+
158
+ # Allow up to 5% differences for minor variations
159
+ if difference_ratio > 0.05:
160
+ return False, f"Too many tile differences: {differences}/{total_tiles} ({difference_ratio:.1%})"
161
+
162
+ return True, f"Maps match (differences: {differences}/{total_tiles}, {difference_ratio:.1%})"
163
+
164
+
165
+ class TestServerMapValidation:
166
+ """Test server-based map reading for different scenarios"""
167
+
168
+ def test_house_state_map(self, server_tester):
169
+ """Test map reading from house state"""
170
+ assert server_tester.start_server("tests/states/house.state"), "Failed to start server"
171
+
172
+ map_data = server_tester.get_map_data()
173
+ assert map_data is not None, "Failed to get map data"
174
+
175
+ # Validate basic properties
176
+ assert "BRENDAN" in map_data['location'].upper(), f"Unexpected location: {map_data['location']}"
177
+ assert "HOUSE" in map_data['location'].upper(), f"Not in house: {map_data['location']}"
178
+ assert len(map_data['tiles']) > 0, "Empty map tiles"
179
+
180
+ # Save as reference
181
+ reference_file = save_reference_map(map_data['location'], map_data, "tests/map_references")
182
+ assert reference_file.exists(), "Failed to save reference file"
183
+
184
+ print(f"✅ House state map validated and saved to {reference_file}")
185
+
186
+ def test_upstairs_state_map(self, server_tester):
187
+ """Test map reading from upstairs state"""
188
+ assert server_tester.start_server("tests/states/upstairs.state"), "Failed to start server"
189
+
190
+ map_data = server_tester.get_map_data()
191
+ assert map_data is not None, "Failed to get map data"
192
+
193
+ # Validate upstairs properties
194
+ assert "2F" in map_data['location'] or "UPSTAIRS" in map_data['location'].upper(), f"Not upstairs: {map_data['location']}"
195
+
196
+ tiles = map_data['tiles']
197
+ assert len(tiles) >= 10, "Map too small"
198
+ assert len(tiles[0]) >= 10, "Map too narrow"
199
+
200
+ # Check for reasonable tile diversity (indoor areas should have various behavior types)
201
+ total_tiles = sum(len(row) for row in tiles)
202
+ behavior_counts = {}
203
+ for row in tiles:
204
+ for tile in row:
205
+ if len(tile) >= 2:
206
+ behavior = tile[1]
207
+ behavior_counts[behavior] = behavior_counts.get(behavior, 0) + 1
208
+
209
+ # Should have at least 3 different behavior types for a proper indoor area
210
+ unique_behaviors = len(behavior_counts)
211
+ assert unique_behaviors >= 3, f"Too few behavior types: {unique_behaviors} (behaviors: {list(behavior_counts.keys())})"
212
+
213
+ # Should not be dominated by a single behavior type (>90%)
214
+ max_behavior_count = max(behavior_counts.values()) if behavior_counts else 0
215
+ dominance_ratio = max_behavior_count / total_tiles if total_tiles > 0 else 0
216
+ assert dominance_ratio < 0.9, f"Single behavior dominates: {dominance_ratio:.1%}"
217
+
218
+ # Save as reference
219
+ reference_file = save_reference_map(map_data['location'], map_data, "tests/map_references")
220
+ assert reference_file.exists(), "Failed to save reference file"
221
+
222
+ print(f"✅ Upstairs state map validated and saved to {reference_file}")
223
+
224
+ def test_house_to_outside_transition(self, server_tester):
225
+ """Test area transition from house to outside"""
226
+ assert server_tester.start_server("tests/states/house.state"), "Failed to start server"
227
+
228
+ # Get initial house map
229
+ house_map = server_tester.get_map_data()
230
+ assert "HOUSE" in house_map['location'].upper(), f"Not in house: {house_map['location']}"
231
+
232
+ # Move outside
233
+ actions = [{"buttons": ["down"]} for _ in range(3)]
234
+ server_tester.execute_actions(actions)
235
+
236
+ # Get outside map
237
+ outside_map = server_tester.get_map_data()
238
+ assert outside_map is not None, "Failed to get outside map"
239
+ assert "TOWN" in outside_map['location'].upper(), f"Not in town: {outside_map['location']}"
240
+
241
+ # Validate outside map quality
242
+ tiles = outside_map['tiles']
243
+ total_tiles = sum(len(row) for row in tiles)
244
+ unknown_tiles = sum(1 for row in tiles for tile in row if len(tile) >= 2 and tile[1] == 0) # UNKNOWN = 0
245
+
246
+ unknown_ratio = unknown_tiles / total_tiles if total_tiles > 0 else 0
247
+
248
+ # Log the unknown ratio for debugging
249
+ print(f"Outside map unknown ratio: {unknown_ratio:.1%}")
250
+
251
+ # If too many unknown tiles, this indicates the area transition bug
252
+ if unknown_ratio > 0.3:
253
+ print(f"⚠️ DETECTED AREA TRANSITION ISSUE: {unknown_ratio:.1%} unknown tiles")
254
+ print("This test demonstrates that the area transition bug still occurs sometimes")
255
+ # For now, save this as a reference anyway to track the issue
256
+ else:
257
+ print(f"✅ Area transition successful: {unknown_ratio:.1%} unknown tiles")
258
+
259
+ # Save as reference
260
+ reference_file = save_reference_map(outside_map['location'], outside_map, "tests/map_references")
261
+ assert reference_file.exists(), "Failed to save reference file"
262
+
263
+ print(f"✅ House-to-outside transition validated and saved to {reference_file}")
264
+
265
+ def test_regression_against_references(self, server_tester):
266
+ """Test current maps against saved references"""
267
+ reference_dir = Path("tests/map_references")
268
+ if not reference_dir.exists():
269
+ pytest.skip("No reference files exist yet - run other tests first")
270
+
271
+ reference_files = list(reference_dir.glob("*_reference.json"))
272
+ if not reference_files:
273
+ pytest.skip("No reference files found")
274
+
275
+ # Test each reference
276
+ for reference_file in reference_files:
277
+ with open(reference_file, 'r') as f:
278
+ reference = json.load(f)
279
+
280
+ location = reference['location']
281
+
282
+ # Determine which state file to use based on location
283
+ if "BRENDAN" in location.upper() and "HOUSE" in location.upper() and "2F" not in location:
284
+ state_file = "tests/states/house.state"
285
+ elif "2F" in location or "UPSTAIRS" in location.upper():
286
+ state_file = "tests/states/upstairs.state"
287
+ else:
288
+ # For outdoor locations, start from house and transition
289
+ state_file = "tests/states/house.state"
290
+
291
+ assert server_tester.start_server(state_file), f"Failed to start server for {location}"
292
+
293
+ # If outdoor location, perform transition
294
+ if "TOWN" in location.upper():
295
+ actions = [{"buttons": ["down"]} for _ in range(3)]
296
+ server_tester.execute_actions(actions)
297
+
298
+ current_map = server_tester.get_map_data()
299
+ assert current_map is not None, f"Failed to get map for {location}"
300
+
301
+ # Compare with reference
302
+ matches, message = compare_with_reference(current_map, reference_file)
303
+ assert matches, f"Map regression for {location}: {message}"
304
+
305
+ print(f"✅ Regression test passed for {location}: {message}")
306
+
307
+
308
+ if __name__ == "__main__":
309
+ # Run tests manually for development
310
+ import sys
311
+ sys.exit(pytest.main([__file__, "-v", "-s"]))
@@ -0,0 +1,259 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test for torchic state and milestone reading
4
+
5
+ This test verifies that:
6
+ 1. The torchic state loads correctly
7
+ 2. The state contains the expected data (player in Littleroot Town, has Torchic)
8
+ 3. The milestones are correctly detected and include Littleroot Town
9
+ """
10
+
11
+ import pytest
12
+ import subprocess
13
+ import time
14
+ import requests
15
+ import json
16
+ import os
17
+
18
+ class ServerManager:
19
+ """Manages server startup and shutdown for tests"""
20
+
21
+ def __init__(self):
22
+ self.server_process = None
23
+
24
+ def start_server(self, state_file):
25
+ """Start the server with a specific state file"""
26
+ print(f"🚀 Starting server with state: {state_file}")
27
+ cmd = ["python", "-m", "server.app", "--manual", "--load-state", state_file]
28
+
29
+ try:
30
+ self.server_process = subprocess.Popen(
31
+ cmd,
32
+ stdout=subprocess.PIPE,
33
+ stderr=subprocess.PIPE
34
+ )
35
+
36
+ # Wait for server to start
37
+ print("⏳ Waiting for server to start...")
38
+ time.sleep(5)
39
+
40
+ # Test if server is responding
41
+ response = requests.get("http://localhost:8000/status", timeout=5)
42
+ if response.status_code == 200:
43
+ print("✅ Server started successfully")
44
+ return True
45
+ else:
46
+ print(f"❌ Server not responding: {response.status_code}")
47
+ return False
48
+
49
+ except Exception as e:
50
+ print(f"❌ Failed to start server: {e}")
51
+ return False
52
+
53
+ def stop_server(self):
54
+ """Stop the server cleanly"""
55
+ if self.server_process:
56
+ print("🛑 Stopping server...")
57
+ try:
58
+ # Try graceful shutdown first
59
+ requests.post("http://localhost:8000/stop", timeout=2)
60
+ time.sleep(1)
61
+ except:
62
+ pass
63
+
64
+ # Force terminate if still running
65
+ try:
66
+ self.server_process.terminate()
67
+ self.server_process.wait(timeout=5)
68
+ print("✅ Server stopped gracefully")
69
+ except subprocess.TimeoutExpired:
70
+ print("⚠️ Server didn't stop gracefully, force killing...")
71
+ self.server_process.kill()
72
+ self.server_process.wait()
73
+ print("✅ Server force killed")
74
+
75
+ @pytest.fixture(scope="session", autouse=True)
76
+ def check_environment():
77
+ """Check that required files exist"""
78
+ torchic_state = "tests/states/torchic.state"
79
+ if not os.path.exists(torchic_state):
80
+ pytest.skip(f"Torchic state file not found: {torchic_state}")
81
+
82
+ print(f"✅ Found torchic state file: {torchic_state}")
83
+
84
+ def test_torchic_state_loading():
85
+ """Test that the torchic state loads correctly"""
86
+ server_manager = ServerManager()
87
+
88
+ try:
89
+ # Start server with torchic state
90
+ assert server_manager.start_server("tests/states/torchic.state"), "Failed to start server"
91
+
92
+ # Get comprehensive state
93
+ response = requests.get("http://localhost:8000/state", timeout=10)
94
+ assert response.status_code == 200, f"Failed to get state: {response.status_code}"
95
+
96
+ state_data = response.json()
97
+
98
+ # Test basic state structure
99
+ assert "player" in state_data, "State missing player data"
100
+ assert "game" in state_data, "State missing game data"
101
+ assert "visual" in state_data, "State missing visual data"
102
+
103
+ # Test player data
104
+ player = state_data["player"]
105
+ assert "name" in player, "Player data missing name"
106
+ assert "location" in player, "Player data missing location"
107
+ assert "position" in player, "Player data missing position"
108
+ assert "party" in player, "Player data missing party"
109
+
110
+ # Test that player is in Route 101 (where the torchic state is)
111
+ location = player["location"]
112
+ print(f"📍 Player location: {location}")
113
+ assert "ROUTE 101" in location.upper(), f"Expected player to be in Route 101, but found: {location}"
114
+
115
+ # Test party data
116
+ party = player["party"]
117
+ assert isinstance(party, list), "Party should be a list"
118
+ assert len(party) > 0, "Party should not be empty"
119
+
120
+ # Test that first Pokemon is Torchic
121
+ first_pokemon = party[0]
122
+ assert "species_name" in first_pokemon, "Pokemon missing species_name"
123
+ species_name = first_pokemon["species_name"]
124
+ print(f"🔥 First Pokemon: {species_name}")
125
+ assert species_name.upper() == "TORCHIC", f"Expected Torchic, but found: {species_name}"
126
+
127
+ # Test Pokemon data structure
128
+ assert "level" in first_pokemon, "Pokemon missing level"
129
+ assert "current_hp" in first_pokemon, "Pokemon missing current_hp"
130
+ assert "max_hp" in first_pokemon, "Pokemon missing max_hp"
131
+ assert "moves" in first_pokemon, "Pokemon missing moves"
132
+
133
+ print(f"✅ Torchic level: {first_pokemon['level']}")
134
+ print(f"✅ Torchic HP: {first_pokemon['current_hp']}/{first_pokemon['max_hp']}")
135
+ print(f"✅ Torchic moves: {first_pokemon['moves']}")
136
+
137
+ finally:
138
+ server_manager.stop_server()
139
+
140
+ def test_torchic_milestones():
141
+ """Test that milestones are correctly detected for torchic state"""
142
+ server_manager = ServerManager()
143
+
144
+ try:
145
+ # Start server with torchic state
146
+ assert server_manager.start_server("tests/states/torchic.state"), "Failed to start server"
147
+
148
+ # Get milestones
149
+ response = requests.get("http://localhost:8000/milestones", timeout=10)
150
+ assert response.status_code == 200, f"Failed to get milestones: {response.status_code}"
151
+
152
+ milestones_data = response.json()
153
+
154
+ # Test milestones structure
155
+ assert "milestones" in milestones_data, "Milestones data missing milestones list"
156
+ assert "completed" in milestones_data, "Milestones data missing completed count"
157
+ assert "total" in milestones_data, "Milestones data missing total count"
158
+ assert "progress" in milestones_data, "Milestones data missing progress"
159
+ assert "current_location" in milestones_data, "Milestones data missing current_location"
160
+
161
+ milestones = milestones_data["milestones"]
162
+ completed = milestones_data["completed"]
163
+ total = milestones_data["total"]
164
+ progress = milestones_data["progress"]
165
+ current_location = milestones_data["current_location"]
166
+
167
+ print(f"📊 Milestones progress: {completed}/{total} ({progress:.1%})")
168
+ print(f"📍 Current location: {current_location}")
169
+
170
+ # Test that Littleroot Town milestone exists (but may not be completed since we're in Route 101)
171
+ littleroot_milestone = None
172
+ for milestone in milestones:
173
+ if "LITTLEROOT" in milestone["name"].upper():
174
+ littleroot_milestone = milestone
175
+ break
176
+
177
+ assert littleroot_milestone is not None, "Littleroot Town milestone not found"
178
+ print(f"🏘️ Littleroot milestone: {littleroot_milestone}")
179
+
180
+ # Test that current location is Route 101
181
+ assert "ROUTE 101" in current_location.upper(), f"Current location should be Route 101, but found: {current_location}"
182
+
183
+ # Test that basic milestones are completed
184
+ basic_milestones = ["GAME_RUNNING", "HAS_PARTY", "STARTER_CHOSEN", "TORCHIC_OBTAINED", "ROUTE_101_VISITED"]
185
+ for milestone_name in basic_milestones:
186
+ milestone = next((m for m in milestones if m["name"] == milestone_name), None)
187
+ assert milestone is not None, f"Basic milestone {milestone_name} not found"
188
+ assert milestone["completed"] == True, f"Basic milestone {milestone_name} should be completed"
189
+ print(f"✅ {milestone_name}: Completed")
190
+
191
+ # Test that some milestones are not yet completed (game just started)
192
+ incomplete_milestones = ["STONE_BADGE", "POKEDEX_RECEIVED", "FIRST_WILD_ENCOUNTER", "LITTLEROOT_TOWN"]
193
+ for milestone_name in incomplete_milestones:
194
+ milestone = next((m for m in milestones if m["name"] == milestone_name), None)
195
+ assert milestone is not None, f"Milestone {milestone_name} not found"
196
+ assert milestone["completed"] == False, f"Milestone {milestone_name} should not be completed yet"
197
+ print(f"⏳ {milestone_name}: Not completed yet")
198
+
199
+ finally:
200
+ server_manager.stop_server()
201
+
202
+ def test_torchic_state_summary():
203
+ """Test that the torchic state provides a comprehensive summary"""
204
+ server_manager = ServerManager()
205
+
206
+ try:
207
+ # Start server with torchic state
208
+ assert server_manager.start_server("tests/states/torchic.state"), "Failed to start server"
209
+
210
+ # Get comprehensive state
211
+ response = requests.get("http://localhost:8000/state", timeout=10)
212
+ assert response.status_code == 200, f"Failed to get state: {response.status_code}"
213
+
214
+ state_data = response.json()
215
+
216
+ # Test game state
217
+ game = state_data["game"]
218
+ assert "money" in game, "Game data missing money"
219
+ assert "game_state" in game, "Game data missing game_state"
220
+ assert "is_in_battle" in game, "Game data missing is_in_battle"
221
+ assert "badges" in game, "Game data missing badges"
222
+ assert "dialog_text" in game, "Game data missing dialog_text"
223
+
224
+ # Test that player has some money (starter money)
225
+ money = game["money"]
226
+ print(f"💰 Player money: {money}")
227
+ assert money >= 0, "Player should have non-negative money"
228
+
229
+ # Test that player is not in battle
230
+ is_in_battle = game["is_in_battle"]
231
+ print(f"⚔️ In battle: {is_in_battle}")
232
+ assert is_in_battle == False, "Player should not be in battle at start"
233
+
234
+ # Test that player has no badges yet
235
+ badges = game["badges"]
236
+ print(f"🏆 Badges: {badges}")
237
+ assert len(badges) == 0, "Player should have no badges at start"
238
+
239
+ # Test visual data
240
+ visual = state_data["visual"]
241
+ assert "screenshot_base64" in visual, "Visual data missing screenshot"
242
+ assert "resolution" in visual, "Visual data missing resolution"
243
+
244
+ resolution = visual["resolution"]
245
+ print(f"📺 Resolution: {resolution}")
246
+ assert resolution == [240, 160], f"Expected resolution [240, 160], got {resolution}"
247
+
248
+ # Test that screenshot is present
249
+ screenshot = visual["screenshot_base64"]
250
+ assert len(screenshot) > 0, "Screenshot should not be empty"
251
+ print(f"📸 Screenshot size: {len(screenshot)} characters")
252
+
253
+ print("✅ Torchic state test completed successfully")
254
+
255
+ finally:
256
+ server_manager.stop_server()
257
+
258
+ if __name__ == "__main__":
259
+ pytest.main([__file__, "-v"])