synth-ai 0.1.9__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +37 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.1.9.dist-info/METADATA +0 -37
- synth_ai-0.1.9.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.1.9.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,558 @@
|
|
1
|
+
"""
|
2
|
+
Pallet Town Early Game Reward Components
|
3
|
+
|
4
|
+
Rewards specifically designed for the first 50 steps of Pokemon Red,
|
5
|
+
focusing on house exploration, town discovery, and story triggers.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
9
|
+
from typing import Dict, Any, Set
|
10
|
+
|
11
|
+
|
12
|
+
class LeaveStartingRoomReward(RewardComponent):
|
13
|
+
"""Reward for going downstairs from bedroom - +15 points"""
|
14
|
+
|
15
|
+
def __init__(self):
|
16
|
+
self.triggered = False
|
17
|
+
|
18
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
19
|
+
if self.triggered:
|
20
|
+
return 0.0
|
21
|
+
|
22
|
+
prev_map = action.get("prev_map_id", -1)
|
23
|
+
current_map = state["map_id"]
|
24
|
+
prev_y = action.get("prev_player_y", -1)
|
25
|
+
current_y = state["player_y"]
|
26
|
+
|
27
|
+
# Detect going downstairs from bedroom (map change + y coordinate change)
|
28
|
+
if prev_map != current_map and prev_y > current_y:
|
29
|
+
self.triggered = True
|
30
|
+
return 15.0
|
31
|
+
return 0.0
|
32
|
+
|
33
|
+
|
34
|
+
class TalkToMomReward(RewardComponent):
|
35
|
+
"""Reward for first conversation with mom - +10 points"""
|
36
|
+
|
37
|
+
def __init__(self):
|
38
|
+
self.mom_talked_to = False
|
39
|
+
|
40
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
41
|
+
if self.mom_talked_to:
|
42
|
+
return 0.0
|
43
|
+
|
44
|
+
# Check if we're in mom's house and had a text interaction
|
45
|
+
if state["map_id"] in [1, 2] and state["text_box_active"]: # Assuming house maps
|
46
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
47
|
+
if not prev_text_active and state["text_box_active"]:
|
48
|
+
self.mom_talked_to = True
|
49
|
+
return 10.0
|
50
|
+
return 0.0
|
51
|
+
|
52
|
+
|
53
|
+
class InteractWithTVReward(RewardComponent):
|
54
|
+
"""Reward for checking the TV downstairs - +5 points"""
|
55
|
+
|
56
|
+
def __init__(self):
|
57
|
+
self.tv_checked = False
|
58
|
+
|
59
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
60
|
+
if self.tv_checked:
|
61
|
+
return 0.0
|
62
|
+
|
63
|
+
# Detect TV interaction in house
|
64
|
+
if state["map_id"] in [1, 2] and state["text_box_active"]:
|
65
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
66
|
+
if not prev_text_active and state["text_box_active"]:
|
67
|
+
# Simple heuristic: TV is usually in certain positions
|
68
|
+
player_x, player_y = state["player_x"], state["player_y"]
|
69
|
+
if (player_x, player_y) in [
|
70
|
+
(3, 4),
|
71
|
+
(4, 4),
|
72
|
+
(5, 4),
|
73
|
+
]: # Common TV positions
|
74
|
+
self.tv_checked = True
|
75
|
+
return 5.0
|
76
|
+
return 0.0
|
77
|
+
|
78
|
+
|
79
|
+
class CheckComputerReward(RewardComponent):
|
80
|
+
"""Reward for interacting with PC in room - +5 points"""
|
81
|
+
|
82
|
+
def __init__(self):
|
83
|
+
self.pc_checked = False
|
84
|
+
|
85
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
86
|
+
if self.pc_checked:
|
87
|
+
return 0.0
|
88
|
+
|
89
|
+
# Detect PC interaction in bedroom
|
90
|
+
if state["map_id"] == 1 and state["text_box_active"]: # Bedroom
|
91
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
92
|
+
if not prev_text_active and state["text_box_active"]:
|
93
|
+
# PC is usually in upper right of bedroom
|
94
|
+
player_x, player_y = state["player_x"], state["player_y"]
|
95
|
+
if player_x >= 6 and player_y <= 3:
|
96
|
+
self.pc_checked = True
|
97
|
+
return 5.0
|
98
|
+
return 0.0
|
99
|
+
|
100
|
+
|
101
|
+
class HouseFullyExploredReward(RewardComponent):
|
102
|
+
"""Reward for checking all interactive objects in starting house - +20 points"""
|
103
|
+
|
104
|
+
def __init__(self):
|
105
|
+
self.interactions: Set[str] = set()
|
106
|
+
self.required_interactions = {"tv", "pc", "mom", "bookshelf", "poster"}
|
107
|
+
|
108
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
109
|
+
if len(self.interactions) >= len(self.required_interactions):
|
110
|
+
return 0.0
|
111
|
+
|
112
|
+
# Track interactions in house
|
113
|
+
if state["map_id"] in [1, 2] and state["text_box_active"]:
|
114
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
115
|
+
if not prev_text_active and state["text_box_active"]:
|
116
|
+
player_x, player_y = state["player_x"], state["player_y"]
|
117
|
+
|
118
|
+
# Map positions to interaction types
|
119
|
+
if (player_x, player_y) in [(3, 4), (4, 4), (5, 4)]:
|
120
|
+
self.interactions.add("tv")
|
121
|
+
elif player_x >= 6 and player_y <= 3:
|
122
|
+
self.interactions.add("pc")
|
123
|
+
elif (player_x, player_y) in [(1, 4), (2, 4)]:
|
124
|
+
self.interactions.add("mom")
|
125
|
+
# Add more position mappings as needed
|
126
|
+
|
127
|
+
if len(self.interactions) >= len(self.required_interactions):
|
128
|
+
return 20.0
|
129
|
+
return 0.0
|
130
|
+
|
131
|
+
|
132
|
+
class ExitHouseReward(RewardComponent):
|
133
|
+
"""Reward for first time leaving the starting house - +20 points"""
|
134
|
+
|
135
|
+
def __init__(self):
|
136
|
+
self.house_exited = False
|
137
|
+
|
138
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
139
|
+
if self.house_exited:
|
140
|
+
return 0.0
|
141
|
+
|
142
|
+
prev_map = action.get("prev_map_id", -1)
|
143
|
+
current_map = state["map_id"]
|
144
|
+
|
145
|
+
# Exit from house to town
|
146
|
+
if prev_map in [1, 2] and current_map == 0: # House to Pallet Town
|
147
|
+
self.house_exited = True
|
148
|
+
return 20.0
|
149
|
+
return 0.0
|
150
|
+
|
151
|
+
|
152
|
+
class ExploreTownReward(RewardComponent):
|
153
|
+
"""Reward for each new building/house entered - +5 points"""
|
154
|
+
|
155
|
+
def __init__(self):
|
156
|
+
self.buildings_entered: Set[int] = set()
|
157
|
+
|
158
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
159
|
+
prev_map = action.get("prev_map_id", -1)
|
160
|
+
current_map = state["map_id"]
|
161
|
+
|
162
|
+
# Entering a new building from town
|
163
|
+
if (
|
164
|
+
prev_map == 0 and current_map > 0 and current_map not in [1, 2]
|
165
|
+
): # From town to new building
|
166
|
+
if current_map not in self.buildings_entered:
|
167
|
+
self.buildings_entered.add(current_map)
|
168
|
+
return 5.0
|
169
|
+
return 0.0
|
170
|
+
|
171
|
+
|
172
|
+
class TalkToNPCsReward(RewardComponent):
|
173
|
+
"""Reward for each unique NPC conversation in Pallet Town - +8 points"""
|
174
|
+
|
175
|
+
def __init__(self):
|
176
|
+
self.npcs_talked_to: Set[tuple] = set()
|
177
|
+
|
178
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
179
|
+
# Detect NPC conversations in Pallet Town
|
180
|
+
if state["map_id"] == 0 and state["text_box_active"]: # Pallet Town
|
181
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
182
|
+
if not prev_text_active and state["text_box_active"]:
|
183
|
+
# Use position as NPC identifier
|
184
|
+
npc_key = (state["player_x"], state["player_y"], state["map_id"])
|
185
|
+
if npc_key not in self.npcs_talked_to:
|
186
|
+
self.npcs_talked_to.add(npc_key)
|
187
|
+
return 8.0
|
188
|
+
return 0.0
|
189
|
+
|
190
|
+
|
191
|
+
class OakLabDiscoveryReward(RewardComponent):
|
192
|
+
"""Reward for finding and entering Oak's lab - +25 points"""
|
193
|
+
|
194
|
+
def __init__(self):
|
195
|
+
self.lab_discovered = False
|
196
|
+
|
197
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
198
|
+
if self.lab_discovered:
|
199
|
+
return 0.0
|
200
|
+
|
201
|
+
prev_map = action.get("prev_map_id", -1)
|
202
|
+
current_map = state["map_id"]
|
203
|
+
|
204
|
+
# Entering Oak's lab (map 3)
|
205
|
+
if prev_map == 0 and current_map == 3:
|
206
|
+
self.lab_discovered = True
|
207
|
+
return 25.0
|
208
|
+
return 0.0
|
209
|
+
|
210
|
+
|
211
|
+
class AttemptRoute1Reward(RewardComponent):
|
212
|
+
"""Reward for trying to leave town (triggers Oak encounter) - +30 points"""
|
213
|
+
|
214
|
+
def __init__(self):
|
215
|
+
self.route_attempted = False
|
216
|
+
|
217
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
218
|
+
if self.route_attempted:
|
219
|
+
return 0.0
|
220
|
+
|
221
|
+
# Detect reaching the edge of Pallet Town (attempting to go north)
|
222
|
+
if state["map_id"] == 0: # In Pallet Town
|
223
|
+
if state["player_y"] <= 1: # At northern edge
|
224
|
+
self.route_attempted = True
|
225
|
+
return 30.0
|
226
|
+
return 0.0
|
227
|
+
|
228
|
+
|
229
|
+
class OakEncounterReward(RewardComponent):
|
230
|
+
"""Reward for triggering Professor Oak to stop you - +50 points"""
|
231
|
+
|
232
|
+
def __init__(self):
|
233
|
+
self.oak_encountered = False
|
234
|
+
|
235
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
236
|
+
if self.oak_encountered:
|
237
|
+
return 0.0
|
238
|
+
|
239
|
+
# Detect Oak stopping you (usually involves specific dialogue)
|
240
|
+
if state["text_box_active"] and not action.get("prev_text_box_active", False):
|
241
|
+
# Check if we're in a situation where Oak would appear
|
242
|
+
if state["map_id"] == 0 and state["player_y"] <= 2:
|
243
|
+
self.oak_encountered = True
|
244
|
+
return 50.0
|
245
|
+
return 0.0
|
246
|
+
|
247
|
+
|
248
|
+
class FollowOakToLabReward(RewardComponent):
|
249
|
+
"""Reward for returning to lab with Oak - +40 points"""
|
250
|
+
|
251
|
+
def __init__(self):
|
252
|
+
self.followed_oak = False
|
253
|
+
self.oak_encounter_happened = False
|
254
|
+
|
255
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
256
|
+
if self.followed_oak:
|
257
|
+
return 0.0
|
258
|
+
|
259
|
+
# Track Oak encounter first
|
260
|
+
if not self.oak_encounter_happened and state["map_id"] == 0 and state["player_y"] <= 2:
|
261
|
+
if state["text_box_active"]:
|
262
|
+
self.oak_encounter_happened = True
|
263
|
+
|
264
|
+
# Then reward entering lab after encounter
|
265
|
+
if self.oak_encounter_happened:
|
266
|
+
prev_map = action.get("prev_map_id", -1)
|
267
|
+
current_map = state["map_id"]
|
268
|
+
if prev_map == 0 and current_map == 3: # Town to lab
|
269
|
+
self.followed_oak = True
|
270
|
+
return 40.0
|
271
|
+
return 0.0
|
272
|
+
|
273
|
+
|
274
|
+
class ChooseStarterPokemonReward(RewardComponent):
|
275
|
+
"""Reward for selecting first Pokemon - +100 points"""
|
276
|
+
|
277
|
+
def __init__(self):
|
278
|
+
self.starter_chosen = False
|
279
|
+
|
280
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
281
|
+
if self.starter_chosen:
|
282
|
+
return 0.0
|
283
|
+
|
284
|
+
# Detect getting first Pokemon
|
285
|
+
prev_party_count = len(action.get("prev_party", []))
|
286
|
+
current_party_count = len(state.get("party", []))
|
287
|
+
|
288
|
+
if prev_party_count == 0 and current_party_count == 1:
|
289
|
+
if state["map_id"] == 3: # In Oak's lab
|
290
|
+
self.starter_chosen = True
|
291
|
+
return 100.0
|
292
|
+
return 0.0
|
293
|
+
|
294
|
+
|
295
|
+
class RivalEncounterReward(RewardComponent):
|
296
|
+
"""Reward for meeting and naming rival - +30 points"""
|
297
|
+
|
298
|
+
def __init__(self):
|
299
|
+
self.rival_met = False
|
300
|
+
|
301
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
302
|
+
if self.rival_met:
|
303
|
+
return 0.0
|
304
|
+
|
305
|
+
# Detect rival encounter (specific dialogue patterns)
|
306
|
+
if state["map_id"] == 3 and state["text_box_active"]: # In Oak's lab
|
307
|
+
# This is a simplified check - in reality you'd analyze dialogue content
|
308
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
309
|
+
if not prev_text_active and state["text_box_active"]:
|
310
|
+
# Check if we have at least one Pokemon (starter chosen)
|
311
|
+
if len(state.get("party", [])) >= 1:
|
312
|
+
self.rival_met = True
|
313
|
+
return 30.0
|
314
|
+
return 0.0
|
315
|
+
|
316
|
+
|
317
|
+
class FirstPokemonBattleReward(RewardComponent):
|
318
|
+
"""Reward for the first battle with rival - +75 points"""
|
319
|
+
|
320
|
+
def __init__(self):
|
321
|
+
self.first_battle_done = False
|
322
|
+
|
323
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
324
|
+
if self.first_battle_done:
|
325
|
+
return 0.0
|
326
|
+
|
327
|
+
# Detect entering battle for first time
|
328
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
329
|
+
current_in_battle = state["in_battle"]
|
330
|
+
|
331
|
+
if not prev_in_battle and current_in_battle:
|
332
|
+
if state["map_id"] == 3: # In Oak's lab
|
333
|
+
self.first_battle_done = True
|
334
|
+
return 75.0
|
335
|
+
return 0.0
|
336
|
+
|
337
|
+
|
338
|
+
class MenuDiscoveryReward(RewardComponent):
|
339
|
+
"""Reward for opening START menu for first time - +10 points"""
|
340
|
+
|
341
|
+
def __init__(self):
|
342
|
+
self.menu_discovered = False
|
343
|
+
|
344
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
345
|
+
if self.menu_discovered:
|
346
|
+
return 0.0
|
347
|
+
|
348
|
+
# This would need menu state tracking - simplified implementation
|
349
|
+
# In real implementation, you'd track when START is pressed
|
350
|
+
buttons_pressed = action.get("buttons_pressed", [])
|
351
|
+
if "START" in buttons_pressed and not self.menu_discovered:
|
352
|
+
self.menu_discovered = True
|
353
|
+
return 10.0
|
354
|
+
return 0.0
|
355
|
+
|
356
|
+
|
357
|
+
class PokemonMenuReward(RewardComponent):
|
358
|
+
"""Reward for checking Pokemon party status - +15 points"""
|
359
|
+
|
360
|
+
def __init__(self):
|
361
|
+
self.pokemon_menu_checked = False
|
362
|
+
|
363
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
364
|
+
if self.pokemon_menu_checked:
|
365
|
+
return 0.0
|
366
|
+
|
367
|
+
# Simplified - would need menu navigation tracking
|
368
|
+
# This is a placeholder for actual menu state detection
|
369
|
+
if len(state.get("party", [])) > 0: # Has Pokemon
|
370
|
+
# Assume menu was checked if we have Pokemon and certain conditions
|
371
|
+
self.pokemon_menu_checked = True
|
372
|
+
return 15.0
|
373
|
+
return 0.0
|
374
|
+
|
375
|
+
|
376
|
+
class BagDiscoveryReward(RewardComponent):
|
377
|
+
"""Reward for opening bag/items menu - +10 points"""
|
378
|
+
|
379
|
+
def __init__(self):
|
380
|
+
self.bag_discovered = False
|
381
|
+
|
382
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
383
|
+
if self.bag_discovered:
|
384
|
+
return 0.0
|
385
|
+
|
386
|
+
# Simplified implementation
|
387
|
+
if len(state.get("inventory", [])) > 0:
|
388
|
+
self.bag_discovered = True
|
389
|
+
return 10.0
|
390
|
+
return 0.0
|
391
|
+
|
392
|
+
|
393
|
+
class SaveGameReward(RewardComponent):
|
394
|
+
"""Reward for saving the game for first time - +20 points"""
|
395
|
+
|
396
|
+
def __init__(self):
|
397
|
+
self.game_saved = False
|
398
|
+
|
399
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
400
|
+
if self.game_saved:
|
401
|
+
return 0.0
|
402
|
+
|
403
|
+
# This would need save state detection
|
404
|
+
# Placeholder implementation
|
405
|
+
if state.get("game_saved", False):
|
406
|
+
self.game_saved = True
|
407
|
+
return 20.0
|
408
|
+
return 0.0
|
409
|
+
|
410
|
+
|
411
|
+
class TryAllDirectionsReward(RewardComponent):
|
412
|
+
"""Reward for attempting movement in all 4 directions - +5 points"""
|
413
|
+
|
414
|
+
def __init__(self):
|
415
|
+
self.directions_tried: Set[str] = set()
|
416
|
+
self.reward_given = False
|
417
|
+
|
418
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
419
|
+
if self.reward_given:
|
420
|
+
return 0.0
|
421
|
+
|
422
|
+
# Track movement directions
|
423
|
+
buttons_pressed = action.get("buttons_pressed", [])
|
424
|
+
for button in buttons_pressed:
|
425
|
+
if button in ["UP", "DOWN", "LEFT", "RIGHT"]:
|
426
|
+
self.directions_tried.add(button)
|
427
|
+
|
428
|
+
if len(self.directions_tried) >= 4:
|
429
|
+
self.reward_given = True
|
430
|
+
return 5.0
|
431
|
+
return 0.0
|
432
|
+
|
433
|
+
|
434
|
+
class DoorInteractionReward(RewardComponent):
|
435
|
+
"""Reward for trying to enter each door/building - +3 points per door"""
|
436
|
+
|
437
|
+
def __init__(self):
|
438
|
+
self.doors_tried: Set[tuple] = set()
|
439
|
+
|
440
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
441
|
+
# Detect door interaction attempts
|
442
|
+
prev_map = action.get("prev_map_id", -1)
|
443
|
+
current_map = state["map_id"]
|
444
|
+
|
445
|
+
if prev_map != current_map and prev_map == 0: # From town to building
|
446
|
+
door_key = (state["player_x"], state["player_y"], current_map)
|
447
|
+
if door_key not in self.doors_tried:
|
448
|
+
self.doors_tried.add(door_key)
|
449
|
+
return 3.0
|
450
|
+
return 0.0
|
451
|
+
|
452
|
+
|
453
|
+
class ObjectInteractionReward(RewardComponent):
|
454
|
+
"""Reward for pressing A on various objects - +3 points per object"""
|
455
|
+
|
456
|
+
def __init__(self):
|
457
|
+
self.objects_interacted: Set[tuple] = set()
|
458
|
+
|
459
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
460
|
+
# Detect A button interactions
|
461
|
+
buttons_pressed = action.get("buttons_pressed", [])
|
462
|
+
if "A" in buttons_pressed and state["text_box_active"]:
|
463
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
464
|
+
if not prev_text_active:
|
465
|
+
object_key = (state["player_x"], state["player_y"], state["map_id"])
|
466
|
+
if object_key not in self.objects_interacted:
|
467
|
+
self.objects_interacted.add(object_key)
|
468
|
+
return 3.0
|
469
|
+
return 0.0
|
470
|
+
|
471
|
+
|
472
|
+
class SignReadingReward(RewardComponent):
|
473
|
+
"""Reward for reading town sign and other informational signs - +5 points"""
|
474
|
+
|
475
|
+
def __init__(self):
|
476
|
+
self.signs_read: Set[tuple] = set()
|
477
|
+
|
478
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
479
|
+
# Detect sign reading (specific positions in town)
|
480
|
+
if state["map_id"] == 0 and state["text_box_active"]: # Pallet Town
|
481
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
482
|
+
if not prev_text_active:
|
483
|
+
# Town sign is usually at specific coordinates
|
484
|
+
sign_positions = [(5, 8), (6, 8), (7, 8)] # Common sign positions
|
485
|
+
player_pos = (state["player_x"], state["player_y"])
|
486
|
+
if player_pos in sign_positions:
|
487
|
+
sign_key = (state["player_x"], state["player_y"])
|
488
|
+
if sign_key not in self.signs_read:
|
489
|
+
self.signs_read.add(sign_key)
|
490
|
+
return 5.0
|
491
|
+
return 0.0
|
492
|
+
|
493
|
+
|
494
|
+
class CompleteTownExplorationReward(RewardComponent):
|
495
|
+
"""Reward for visiting every accessible location - +50 points"""
|
496
|
+
|
497
|
+
def __init__(self):
|
498
|
+
self.locations_visited: Set[tuple] = set()
|
499
|
+
self.required_locations = 20 # Estimated accessible tiles in Pallet Town
|
500
|
+
self.reward_given = False
|
501
|
+
|
502
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
503
|
+
if self.reward_given:
|
504
|
+
return 0.0
|
505
|
+
|
506
|
+
if state["map_id"] == 0: # In Pallet Town
|
507
|
+
location_key = (state["player_x"], state["player_y"])
|
508
|
+
self.locations_visited.add(location_key)
|
509
|
+
|
510
|
+
if len(self.locations_visited) >= self.required_locations:
|
511
|
+
self.reward_given = True
|
512
|
+
return 50.0
|
513
|
+
return 0.0
|
514
|
+
|
515
|
+
|
516
|
+
class AllNPCsTalkedToReward(RewardComponent):
|
517
|
+
"""Reward for speaking with every NPC in town - +30 points"""
|
518
|
+
|
519
|
+
def __init__(self):
|
520
|
+
self.npcs_talked_to: Set[tuple] = set()
|
521
|
+
self.required_npcs = 5 # Estimated NPCs in Pallet Town
|
522
|
+
self.reward_given = False
|
523
|
+
|
524
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
525
|
+
if self.reward_given:
|
526
|
+
return 0.0
|
527
|
+
|
528
|
+
# Track NPC conversations
|
529
|
+
if state["map_id"] == 0 and state["text_box_active"]:
|
530
|
+
prev_text_active = action.get("prev_text_box_active", False)
|
531
|
+
if not prev_text_active:
|
532
|
+
npc_key = (state["player_x"], state["player_y"])
|
533
|
+
self.npcs_talked_to.add(npc_key)
|
534
|
+
|
535
|
+
if len(self.npcs_talked_to) >= self.required_npcs:
|
536
|
+
self.reward_given = True
|
537
|
+
return 30.0
|
538
|
+
return 0.0
|
539
|
+
|
540
|
+
|
541
|
+
class ReadyForAdventureReward(RewardComponent):
|
542
|
+
"""Reward for having starter Pokemon and being ready to leave town - +60 points"""
|
543
|
+
|
544
|
+
def __init__(self):
|
545
|
+
self.ready_reward_given = False
|
546
|
+
|
547
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
548
|
+
if self.ready_reward_given:
|
549
|
+
return 0.0
|
550
|
+
|
551
|
+
# Check if player has starter and is at town exit
|
552
|
+
has_pokemon = len(state.get("party", [])) > 0
|
553
|
+
at_town_exit = state["map_id"] == 0 and state["player_y"] <= 2
|
554
|
+
|
555
|
+
if has_pokemon and at_town_exit:
|
556
|
+
self.ready_reward_given = True
|
557
|
+
return 60.0
|
558
|
+
return 0.0
|