synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.2.0.dist-info/METADATA +0 -36
- synth_ai-0.2.0.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
# Pokemon Red memory addresses for state extraction
|
2
|
+
BADGE_FLAGS = 0xD356 # bit-field for badges
|
3
|
+
MAP_ID = 0xD35E # current map ID
|
4
|
+
PLAYER_X = 0xD362 # player X coordinate
|
5
|
+
PLAYER_Y = 0xD361 # player Y coordinate
|
6
|
+
IN_BATTLE_FLAG = 0xD057 # battle state flag
|
7
|
+
BATTLE_OUTCOME = 0xD089 # 0=ongoing, 1=win, 2=lose
|
8
|
+
|
9
|
+
# Party Pokemon data (up to 6 Pokemon)
|
10
|
+
PARTY_COUNT = 0xD163 # number of Pokemon in party (0-6)
|
11
|
+
PARTY_SPECIES = 0xD164 # species of each Pokemon (6 bytes)
|
12
|
+
PARTY_HP_CURRENT = 0xD16C # current HP of each Pokemon (2 bytes each, 12 bytes total)
|
13
|
+
PARTY_HP_MAX = 0xD188 # max HP of each Pokemon (2 bytes each, 12 bytes total)
|
14
|
+
PARTY_LEVELS = 0xD18C # level of each Pokemon (6 bytes)
|
15
|
+
PARTY_XP = 0xD179 # XP of each Pokemon (3 bytes each, 18 bytes total)
|
16
|
+
|
17
|
+
# Player data
|
18
|
+
MONEY = 0xD347 # player money (3 bytes, BCD format)
|
19
|
+
PLAYER_NAME = 0xD158 # player name (up to 11 bytes)
|
20
|
+
|
21
|
+
# Inventory data
|
22
|
+
INVENTORY_COUNT = 0xD31D # number of items in bag
|
23
|
+
INVENTORY_START = 0xD31E # start of item/quantity pairs (up to 20 items, 2 bytes each)
|
24
|
+
|
25
|
+
# Game state flags
|
26
|
+
MENU_STATE = 0xCC26 # menu state flags
|
27
|
+
WARP_FLAG = 0xD36C # warp/transition flags
|
28
|
+
TEXT_BOX_ACTIVE = 0xCD3D # text box display flag
|
@@ -0,0 +1,275 @@
|
|
1
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
2
|
+
from typing import Dict, Any, Set
|
3
|
+
|
4
|
+
|
5
|
+
class BadgeRewardComponent(RewardComponent):
|
6
|
+
"""Reward for earning gym badges"""
|
7
|
+
|
8
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
9
|
+
prev_badges = action.get("prev_badges", 0)
|
10
|
+
current_badges = state["badges"]
|
11
|
+
new_badges = current_badges & ~prev_badges
|
12
|
+
badge_count = bin(new_badges).count("1")
|
13
|
+
return badge_count * 1.0
|
14
|
+
|
15
|
+
|
16
|
+
class MapTransitionComponent(RewardComponent):
|
17
|
+
"""Reward for exploring new areas"""
|
18
|
+
|
19
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
20
|
+
prev_map = action.get("prev_map_id", -1)
|
21
|
+
current_map = state["map_id"]
|
22
|
+
return 0.1 if current_map != prev_map else 0.0
|
23
|
+
|
24
|
+
|
25
|
+
class BattleVictoryComponent(RewardComponent):
|
26
|
+
"""Reward for winning battles"""
|
27
|
+
|
28
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
29
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
30
|
+
current_in_battle = state["in_battle"]
|
31
|
+
battle_outcome = state["battle_outcome"]
|
32
|
+
|
33
|
+
# Transitioning from battle to not in battle with victory
|
34
|
+
if prev_in_battle and not current_in_battle and battle_outcome == 1:
|
35
|
+
return 0.5
|
36
|
+
return 0.0
|
37
|
+
|
38
|
+
|
39
|
+
class LevelUpComponent(RewardComponent):
|
40
|
+
"""Reward for Pokemon leveling up"""
|
41
|
+
|
42
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
43
|
+
prev_level = action.get("prev_party_level", 0)
|
44
|
+
current_level = state["party_level"]
|
45
|
+
level_gain = max(0, current_level - prev_level)
|
46
|
+
return level_gain * 0.3
|
47
|
+
|
48
|
+
|
49
|
+
class XPGainComponent(RewardComponent):
|
50
|
+
"""Small reward for XP gains"""
|
51
|
+
|
52
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
53
|
+
prev_xp = action.get("prev_party_xp", 0)
|
54
|
+
current_xp = state["party_xp"]
|
55
|
+
xp_gain = max(0, current_xp - prev_xp)
|
56
|
+
return xp_gain * 0.001 # Very small multiplier
|
57
|
+
|
58
|
+
|
59
|
+
class StepPenaltyComponent(RewardComponent):
|
60
|
+
"""Small penalty for each step to encourage efficiency"""
|
61
|
+
|
62
|
+
def __init__(self, penalty: float = -0.001):
|
63
|
+
self.penalty = penalty
|
64
|
+
|
65
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
66
|
+
return self.penalty
|
67
|
+
|
68
|
+
|
69
|
+
class MenuPenaltyComponent(RewardComponent):
|
70
|
+
"""Penalty for excessive menu usage"""
|
71
|
+
|
72
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
73
|
+
# This would need more sophisticated menu tracking
|
74
|
+
return 0.0
|
75
|
+
|
76
|
+
|
77
|
+
# ===== NEW EARLY GAME PALLET TOWN REWARDS =====
|
78
|
+
|
79
|
+
|
80
|
+
class ExitHouseReward(RewardComponent):
|
81
|
+
"""High reward for first time leaving the starting house - +2.0 points"""
|
82
|
+
|
83
|
+
def __init__(self):
|
84
|
+
self.house_exited = False
|
85
|
+
|
86
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
87
|
+
if self.house_exited:
|
88
|
+
return 0.0
|
89
|
+
|
90
|
+
prev_map = action.get("prev_map_id", -1)
|
91
|
+
current_map = state["map_id"]
|
92
|
+
|
93
|
+
# Exit from house to town (assuming house maps are 1,2 and town is 0)
|
94
|
+
if prev_map in [1, 2] and current_map == 0:
|
95
|
+
self.house_exited = True
|
96
|
+
return 2.0
|
97
|
+
return 0.0
|
98
|
+
|
99
|
+
|
100
|
+
class NPCInteractionReward(RewardComponent):
|
101
|
+
"""Reward for talking to NPCs - +0.8 points per unique NPC"""
|
102
|
+
|
103
|
+
def __init__(self):
|
104
|
+
self.npcs_talked_to: Set[tuple] = set()
|
105
|
+
|
106
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
107
|
+
# Detect NPC conversations
|
108
|
+
if state["text_box_active"] and not action.get("prev_text_box_active", False):
|
109
|
+
# Use position as NPC identifier
|
110
|
+
npc_key = (state["player_x"], state["player_y"], state["map_id"])
|
111
|
+
if npc_key not in self.npcs_talked_to:
|
112
|
+
self.npcs_talked_to.add(npc_key)
|
113
|
+
return 0.8
|
114
|
+
return 0.0
|
115
|
+
|
116
|
+
|
117
|
+
class OakLabDiscoveryReward(RewardComponent):
|
118
|
+
"""High reward for finding and entering Oak's lab - +2.5 points"""
|
119
|
+
|
120
|
+
def __init__(self):
|
121
|
+
self.lab_discovered = False
|
122
|
+
|
123
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
124
|
+
if self.lab_discovered:
|
125
|
+
return 0.0
|
126
|
+
|
127
|
+
prev_map = action.get("prev_map_id", -1)
|
128
|
+
current_map = state["map_id"]
|
129
|
+
|
130
|
+
# Entering Oak's lab (assuming map 3)
|
131
|
+
if prev_map == 0 and current_map == 3:
|
132
|
+
self.lab_discovered = True
|
133
|
+
return 2.5
|
134
|
+
return 0.0
|
135
|
+
|
136
|
+
|
137
|
+
class StarterPokemonReward(RewardComponent):
|
138
|
+
"""Very high reward for getting first Pokemon - +10.0 points"""
|
139
|
+
|
140
|
+
def __init__(self):
|
141
|
+
self.starter_obtained = False
|
142
|
+
|
143
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
144
|
+
if self.starter_obtained:
|
145
|
+
return 0.0
|
146
|
+
|
147
|
+
# Detect getting first Pokemon
|
148
|
+
prev_party_count = len(action.get("prev_party", []))
|
149
|
+
current_party_count = len(state.get("party", []))
|
150
|
+
|
151
|
+
if prev_party_count == 0 and current_party_count == 1:
|
152
|
+
if state["map_id"] == 3: # In Oak's lab
|
153
|
+
self.starter_obtained = True
|
154
|
+
return 10.0
|
155
|
+
return 0.0
|
156
|
+
|
157
|
+
|
158
|
+
class FirstBattleReward(RewardComponent):
|
159
|
+
"""High reward for engaging in first battle - +5.0 points"""
|
160
|
+
|
161
|
+
def __init__(self):
|
162
|
+
self.first_battle = False
|
163
|
+
|
164
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
165
|
+
if self.first_battle:
|
166
|
+
return 0.0
|
167
|
+
|
168
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
169
|
+
current_in_battle = state["in_battle"]
|
170
|
+
|
171
|
+
if not prev_in_battle and current_in_battle:
|
172
|
+
self.first_battle = True
|
173
|
+
return 5.0
|
174
|
+
return 0.0
|
175
|
+
|
176
|
+
|
177
|
+
class DirectionExplorationReward(RewardComponent):
|
178
|
+
"""Reward for trying all movement directions - +1.0 points when complete"""
|
179
|
+
|
180
|
+
def __init__(self):
|
181
|
+
self.directions_tried: Set[str] = set()
|
182
|
+
self.reward_given = False
|
183
|
+
|
184
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
185
|
+
if self.reward_given:
|
186
|
+
return 0.0
|
187
|
+
|
188
|
+
# Track movement directions based on position changes
|
189
|
+
prev_x = action.get("prev_player_x", state["player_x"])
|
190
|
+
prev_y = action.get("prev_player_y", state["player_y"])
|
191
|
+
current_x = state["player_x"]
|
192
|
+
current_y = state["player_y"]
|
193
|
+
|
194
|
+
if current_x > prev_x:
|
195
|
+
self.directions_tried.add("RIGHT")
|
196
|
+
elif current_x < prev_x:
|
197
|
+
self.directions_tried.add("LEFT")
|
198
|
+
elif current_y > prev_y:
|
199
|
+
self.directions_tried.add("DOWN")
|
200
|
+
elif current_y < prev_y:
|
201
|
+
self.directions_tried.add("UP")
|
202
|
+
|
203
|
+
if len(self.directions_tried) >= 4:
|
204
|
+
self.reward_given = True
|
205
|
+
return 1.0
|
206
|
+
return 0.0
|
207
|
+
|
208
|
+
|
209
|
+
class BuildingExplorationReward(RewardComponent):
|
210
|
+
"""Reward for entering different buildings - +0.5 points per building"""
|
211
|
+
|
212
|
+
def __init__(self):
|
213
|
+
self.buildings_entered: Set[int] = set()
|
214
|
+
|
215
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
216
|
+
prev_map = action.get("prev_map_id", -1)
|
217
|
+
current_map = state["map_id"]
|
218
|
+
|
219
|
+
# Entering a new building from town
|
220
|
+
if (
|
221
|
+
prev_map == 0 and current_map > 0 and current_map not in [1, 2]
|
222
|
+
): # From town to new building
|
223
|
+
if current_map not in self.buildings_entered:
|
224
|
+
self.buildings_entered.add(current_map)
|
225
|
+
return 0.5
|
226
|
+
return 0.0
|
227
|
+
|
228
|
+
|
229
|
+
class ObjectInteractionReward(RewardComponent):
|
230
|
+
"""Reward for pressing A on various objects - +0.3 points per object"""
|
231
|
+
|
232
|
+
def __init__(self):
|
233
|
+
self.objects_interacted: Set[tuple] = set()
|
234
|
+
|
235
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
236
|
+
# Detect A button interactions that trigger text
|
237
|
+
if state["text_box_active"] and not action.get("prev_text_box_active", False):
|
238
|
+
object_key = (state["player_x"], state["player_y"], state["map_id"])
|
239
|
+
if object_key not in self.objects_interacted:
|
240
|
+
self.objects_interacted.add(object_key)
|
241
|
+
return 0.3
|
242
|
+
return 0.0
|
243
|
+
|
244
|
+
|
245
|
+
class TownExplorationReward(RewardComponent):
|
246
|
+
"""Reward for thorough town exploration - +0.1 per new position"""
|
247
|
+
|
248
|
+
def __init__(self):
|
249
|
+
self.positions_visited: Set[tuple] = set()
|
250
|
+
|
251
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
252
|
+
if state["map_id"] == 0: # In Pallet Town
|
253
|
+
position_key = (state["player_x"], state["player_y"])
|
254
|
+
if position_key not in self.positions_visited:
|
255
|
+
self.positions_visited.add(position_key)
|
256
|
+
return 0.1
|
257
|
+
return 0.0
|
258
|
+
|
259
|
+
|
260
|
+
class RouteAttemptReward(RewardComponent):
|
261
|
+
"""Reward for trying to leave town (triggers story) - +3.0 points"""
|
262
|
+
|
263
|
+
def __init__(self):
|
264
|
+
self.route_attempted = False
|
265
|
+
|
266
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
267
|
+
if self.route_attempted:
|
268
|
+
return 0.0
|
269
|
+
|
270
|
+
# Detect reaching the edge of Pallet Town (attempting to go north)
|
271
|
+
if state["map_id"] == 0: # In Pallet Town
|
272
|
+
if state["player_y"] <= 1: # At northern edge
|
273
|
+
self.route_attempted = True
|
274
|
+
return 3.0
|
275
|
+
return 0.0
|
@@ -0,0 +1,142 @@
|
|
1
|
+
"""
|
2
|
+
Pokemon Red Reward Library
|
3
|
+
|
4
|
+
Comprehensive collection of reward components organized by category.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from .pallet_town_rewards import *
|
8
|
+
from .exploration_rewards import *
|
9
|
+
from .social_rewards import *
|
10
|
+
from .pokemon_rewards import *
|
11
|
+
from .battle_rewards import *
|
12
|
+
from .story_rewards import *
|
13
|
+
from .economy_rewards import *
|
14
|
+
from .efficiency_rewards import *
|
15
|
+
from .novelty_rewards import *
|
16
|
+
from .adaptive_rewards import *
|
17
|
+
from .composite_rewards import *
|
18
|
+
|
19
|
+
__all__ = [
|
20
|
+
# Pallet Town Early Game
|
21
|
+
"LeaveStartingRoomReward",
|
22
|
+
"TalkToMomReward",
|
23
|
+
"InteractWithTVReward",
|
24
|
+
"CheckComputerReward",
|
25
|
+
"HouseFullyExploredReward",
|
26
|
+
"ExitHouseReward",
|
27
|
+
"ExploreTownReward",
|
28
|
+
"TalkToNPCsReward",
|
29
|
+
"OakLabDiscoveryReward",
|
30
|
+
"AttemptRoute1Reward",
|
31
|
+
"OakEncounterReward",
|
32
|
+
"FollowOakToLabReward",
|
33
|
+
"ChooseStarterPokemonReward",
|
34
|
+
"RivalEncounterReward",
|
35
|
+
"FirstPokemonBattleReward",
|
36
|
+
"MenuDiscoveryReward",
|
37
|
+
"PokemonMenuReward",
|
38
|
+
"BagDiscoveryReward",
|
39
|
+
"SaveGameReward",
|
40
|
+
"TryAllDirectionsReward",
|
41
|
+
"DoorInteractionReward",
|
42
|
+
"ObjectInteractionReward",
|
43
|
+
"SignReadingReward",
|
44
|
+
"CompleteTownExplorationReward",
|
45
|
+
"AllNPCsTalkedToReward",
|
46
|
+
"ReadyForAdventureReward",
|
47
|
+
# Exploration
|
48
|
+
"NewAreaDiscoveryReward",
|
49
|
+
"AreaCompletionReward",
|
50
|
+
"RouteCompletionReward",
|
51
|
+
"BuildingEntryReward",
|
52
|
+
"HiddenAreaDiscoveryReward",
|
53
|
+
"HiddenItemFoundReward",
|
54
|
+
"FirstItemOfTypeReward",
|
55
|
+
"RareItemDiscoveryReward",
|
56
|
+
"KeyItemAcquisitionReward",
|
57
|
+
"FirstWarpUsageReward",
|
58
|
+
"PCUsageReward",
|
59
|
+
"VendingMachineReward",
|
60
|
+
# Social & NPC
|
61
|
+
"NewNPCConversationReward",
|
62
|
+
"HelpfulInformationReceivedReward",
|
63
|
+
"StoryDialogueProgressionReward",
|
64
|
+
"ProfessorOakInteractionsReward",
|
65
|
+
"NPCGiftReceivedReward",
|
66
|
+
"TradeCompletionReward",
|
67
|
+
"NameRaterUsageReward",
|
68
|
+
# Pokemon Collection
|
69
|
+
"FirstPokemonCaughtReward",
|
70
|
+
"NewSpeciesCaughtReward",
|
71
|
+
"RarePokemonCaughtReward",
|
72
|
+
"EvolutionStonePokemonReward",
|
73
|
+
"PokedexMilestonesReward",
|
74
|
+
"AreaPokedexCompletionReward",
|
75
|
+
"TypeCollectionReward",
|
76
|
+
"PokemonEvolutionReward",
|
77
|
+
"LevelMilestonesReward",
|
78
|
+
"MoveLearningReward",
|
79
|
+
"TMHMTeachingReward",
|
80
|
+
# Battle & Combat
|
81
|
+
"WildPokemonDefeatedReward",
|
82
|
+
"TrainerBattleVictoryReward",
|
83
|
+
"GymLeaderVictoryReward",
|
84
|
+
"EliteFourMemberVictoryReward",
|
85
|
+
"ChampionVictoryReward",
|
86
|
+
"TypeAdvantageUsageReward",
|
87
|
+
"CriticalHitReward",
|
88
|
+
"StatusEffectUsageReward",
|
89
|
+
"OHKOReward",
|
90
|
+
"FlawlessVictoryReward",
|
91
|
+
"UnderleveledVictoryReward",
|
92
|
+
"BattleStreakReward",
|
93
|
+
# Story & Achievement
|
94
|
+
"GymBadgeEarnedReward",
|
95
|
+
"HMAcquisitionReward",
|
96
|
+
"EliteFourAccessReward",
|
97
|
+
"HallOfFameEntryReward",
|
98
|
+
"RivalBattleCompletionReward",
|
99
|
+
"TeamRocketDefeatReward",
|
100
|
+
"LegendaryEncounterReward",
|
101
|
+
"SilphCoCompletionReward",
|
102
|
+
"SafariZoneSuccessReward",
|
103
|
+
"GameCornerPrizesReward",
|
104
|
+
"FossilRevivalReward",
|
105
|
+
# Economy & Resources
|
106
|
+
"FirstEarningsReward",
|
107
|
+
"WealthMilestonesReward",
|
108
|
+
"SmartPurchasesReward",
|
109
|
+
"RarePurchaseReward",
|
110
|
+
"InventoryOrganizationReward",
|
111
|
+
"HealingItemUsageReward",
|
112
|
+
"PokeballEfficiencyReward",
|
113
|
+
# Efficiency & Optimization
|
114
|
+
"FastTravelUsageReward",
|
115
|
+
"OptimalRoutingReward",
|
116
|
+
"PuzzleSolvingReward",
|
117
|
+
"MoveEffectivenessReward",
|
118
|
+
"EvolutionTimingReward",
|
119
|
+
"HMUsageReward",
|
120
|
+
# Novelty & Exploration
|
121
|
+
"FirstBattleReward",
|
122
|
+
"FirstPokemonCenterVisitReward",
|
123
|
+
"FirstPokemartPurchaseReward",
|
124
|
+
"FirstSaveReward",
|
125
|
+
"MenuExplorationReward",
|
126
|
+
"ButtonDiscoveryReward",
|
127
|
+
"FeatureDiscoveryReward",
|
128
|
+
# Adaptive & Learning
|
129
|
+
"MistakeRecoveryReward",
|
130
|
+
"StrategyAdaptationReward",
|
131
|
+
"ResourceConservationReward",
|
132
|
+
"PatternRecognitionReward",
|
133
|
+
"RouteOptimizationReward",
|
134
|
+
"BattlePreparationReward",
|
135
|
+
# Composite & Milestone
|
136
|
+
"PerfectGymRunReward",
|
137
|
+
"AreaMasteryReward",
|
138
|
+
"SpeedrunMilestonesReward",
|
139
|
+
"ExplorationStreakReward",
|
140
|
+
"BattleWinStreakReward",
|
141
|
+
"PerfectDayReward",
|
142
|
+
]
|
@@ -0,0 +1,56 @@
|
|
1
|
+
"""
|
2
|
+
Adaptive & Learning Reward Components
|
3
|
+
|
4
|
+
Rewards for improvement over time and meta-learning.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
8
|
+
from typing import Dict, Any
|
9
|
+
|
10
|
+
|
11
|
+
class MistakeRecoveryReward(RewardComponent):
|
12
|
+
"""Reward for correcting previous errors - +10 points"""
|
13
|
+
|
14
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
15
|
+
# Placeholder - would need mistake tracking and recovery detection
|
16
|
+
return 0.0
|
17
|
+
|
18
|
+
|
19
|
+
class StrategyAdaptationReward(RewardComponent):
|
20
|
+
"""Reward for changing tactics based on type matchups - +15 points"""
|
21
|
+
|
22
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
23
|
+
# Placeholder - would need strategy analysis
|
24
|
+
return 0.0
|
25
|
+
|
26
|
+
|
27
|
+
class ResourceConservationReward(RewardComponent):
|
28
|
+
"""Reward for efficient PP/item usage - +8 points"""
|
29
|
+
|
30
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
31
|
+
# Placeholder - would need resource usage tracking
|
32
|
+
return 0.0
|
33
|
+
|
34
|
+
|
35
|
+
class PatternRecognitionReward(RewardComponent):
|
36
|
+
"""Reward for recognizing and adapting to trainer patterns - +12 points"""
|
37
|
+
|
38
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
39
|
+
# Placeholder - would need pattern analysis
|
40
|
+
return 0.0
|
41
|
+
|
42
|
+
|
43
|
+
class RouteOptimizationReward(RewardComponent):
|
44
|
+
"""Reward for finding better paths on repeat visits - +20 points"""
|
45
|
+
|
46
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
47
|
+
# Placeholder - would need route comparison
|
48
|
+
return 0.0
|
49
|
+
|
50
|
+
|
51
|
+
class BattlePreparationReward(RewardComponent):
|
52
|
+
"""Reward for healing/preparing before major battles - +15 points"""
|
53
|
+
|
54
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
55
|
+
# Placeholder - would need preparation detection
|
56
|
+
return 0.0
|