synth-ai 0.2.0__py3-none-any.whl → 0.2.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +28 -2
- synth_ai/core/system.py +4 -0
- synth_ai/environments/__init__.py +35 -0
- synth_ai/environments/environment/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/__init__.py +1 -0
- synth_ai/environments/environment/artifacts/base.py +50 -0
- synth_ai/environments/environment/core.py +22 -0
- synth_ai/environments/environment/db/__init__.py +1 -0
- synth_ai/environments/environment/db/sqlite.py +45 -0
- synth_ai/environments/environment/registry.py +24 -0
- synth_ai/environments/environment/resources/sqlite.py +46 -0
- synth_ai/environments/environment/results.py +1 -0
- synth_ai/environments/environment/rewards/__init__.py +1 -0
- synth_ai/environments/environment/rewards/core.py +28 -0
- synth_ai/environments/environment/shared_engine.py +26 -0
- synth_ai/environments/environment/tools/__init__.py +34 -0
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +872 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +1110 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/engine.py +502 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/environment.py +255 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +228 -0
- synth_ai/environments/examples/enron/agent_demos/test_synth_react.py +535 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +111 -0
- synth_ai/environments/examples/enron/units/test_email_index.py +8 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +47 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +220 -0
- synth_ai/environments/examples/minigrid/agent_demos/test_minigrid_react_agent.py +393 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/minigrid/units/test_action_behavior.py +226 -0
- synth_ai/environments/examples/minigrid/units/test_debug_messages.py +83 -0
- synth_ai/environments/examples/minigrid/units/test_exploration.py +120 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_engine.py +214 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment.py +238 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_environment_mapping.py +301 -0
- synth_ai/environments/examples/minigrid/units/test_minigrid_taskset.py +210 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +832 -0
- synth_ai/environments/examples/nethack/agent_demos/test_nethack_react_agent.py +1112 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/nethack/units/test_nethack_engine.py +277 -0
- synth_ai/environments/examples/nethack/units/test_nethack_environment.py +281 -0
- synth_ai/environments/examples/nethack/units/test_nethack_taskset.py +213 -0
- synth_ai/environments/examples/nethack/units/test_recording.py +307 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/agent_demos/test_synth_react.py +1471 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/red/test_fixes.py +125 -0
- synth_ai/environments/examples/red/test_fixes_mock.py +148 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/red/units/test_basic_functionality.py +97 -0
- synth_ai/environments/examples/red/units/test_button_press_requirements.py +217 -0
- synth_ai/environments/examples/red/units/test_engine.py +192 -0
- synth_ai/environments/examples/red/units/test_environment.py +455 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +227 -0
- synth_ai/environments/examples/red/units/test_integration.py +217 -0
- synth_ai/environments/examples/red/units/test_memory_extraction.py +111 -0
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +1100 -0
- synth_ai/environments/examples/red/units/test_movement_debug.py +255 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_debug.py +163 -0
- synth_ai/environments/examples/red/units/test_pokemon_mcts_verbose.py +117 -0
- synth_ai/environments/examples/red/units/test_red_basic.py +145 -0
- synth_ai/environments/examples/red/units/test_red_comprehensive.py +323 -0
- synth_ai/environments/examples/red/units/test_retry_movement.py +195 -0
- synth_ai/environments/examples/red/units/test_reward_components.py +186 -0
- synth_ai/environments/examples/red/units/test_rom_integration.py +260 -0
- synth_ai/environments/examples/red/units/test_taskset.py +116 -0
- synth_ai/environments/examples/red/units/test_tree.py +448 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +900 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_dspy_react.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_sokoban_react_agent.py +498 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_lats.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_locally.py +748 -0
- synth_ai/environments/examples/sokoban/agent_demos/test_synth_react_service.py +296 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +94 -0
- synth_ai/environments/examples/sokoban/units/test_building_task_set.py +49 -0
- synth_ai/environments/examples/sokoban/units/test_false_positive.py +120 -0
- synth_ai/environments/examples/sokoban/units/test_simple_run_through_environment.py +119 -0
- synth_ai/environments/examples/sokoban/units/test_sokoban_environment.py +98 -0
- synth_ai/environments/examples/sokoban/units/test_tree.py +364 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_synth_react.py +266 -0
- synth_ai/environments/examples/tictactoe/agent_demos/test_tictactoe_react_agent.py +470 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_engine.py +393 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_environment.py +493 -0
- synth_ai/environments/examples/tictactoe/units/test_tictactoe_taskset.py +191 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/agent_demos/test_synth_react.py +520 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/environments/examples/verilog/units/test_verilog_engine.py +466 -0
- synth_ai/environments/examples/verilog/units/test_verilog_environment.py +585 -0
- synth_ai/environments/examples/verilog/units/test_verilog_integration.py +383 -0
- synth_ai/environments/examples/verilog/units/test_verilog_taskset.py +457 -0
- synth_ai/environments/reproducibility/core.py +42 -0
- synth_ai/environments/reproducibility/tree.py +364 -0
- synth_ai/environments/service/app.py +78 -0
- synth_ai/environments/service/core_routes.py +775 -0
- synth_ai/environments/service/external_registry.py +57 -0
- synth_ai/environments/service/registry.py +9 -0
- synth_ai/environments/stateful/__init__.py +1 -0
- synth_ai/environments/stateful/core.py +28 -0
- synth_ai/environments/stateful/engine.py +21 -0
- synth_ai/environments/stateful/state.py +7 -0
- synth_ai/environments/tasks/api.py +19 -0
- synth_ai/environments/tasks/core.py +78 -0
- synth_ai/environments/tasks/filters.py +39 -0
- synth_ai/environments/tasks/utils.py +89 -0
- synth_ai/environments/v0_observability/history.py +3 -0
- synth_ai/environments/v0_observability/log.py +2 -0
- synth_ai/lm/caching/constants.py +1 -0
- synth_ai/{zyk/lms → lm}/caching/ephemeral.py +4 -8
- synth_ai/{zyk/lms → lm}/caching/handler.py +15 -15
- synth_ai/{zyk/lms → lm}/caching/initialize.py +2 -4
- synth_ai/{zyk/lms → lm}/caching/persistent.py +4 -10
- synth_ai/{zyk/lms → lm}/config.py +2 -1
- synth_ai/{zyk/lms → lm}/constants.py +2 -2
- synth_ai/{zyk/lms → lm}/core/all.py +10 -10
- synth_ai/{zyk/lms → lm}/core/main.py +57 -33
- synth_ai/{zyk/lms → lm}/core/vendor_clients.py +12 -10
- synth_ai/lm/cost/monitor.py +1 -0
- synth_ai/lm/cost/statefulness.py +1 -0
- synth_ai/lm/provider_support/__init__.py +8 -0
- synth_ai/lm/provider_support/anthropic.py +945 -0
- synth_ai/lm/provider_support/openai.py +1115 -0
- synth_ai/lm/provider_support/suppress_logging.py +31 -0
- synth_ai/{zyk/lms → lm}/structured_outputs/handler.py +58 -80
- synth_ai/{zyk/lms → lm}/structured_outputs/inject.py +6 -20
- synth_ai/{zyk/lms → lm}/structured_outputs/rehabilitate.py +6 -12
- synth_ai/{zyk/lms → lm}/vendors/core/anthropic_api.py +21 -30
- synth_ai/{zyk/lms → lm}/vendors/core/gemini_api.py +35 -32
- synth_ai/{zyk/lms → lm}/vendors/core/mistral_api.py +19 -28
- synth_ai/{zyk/lms → lm}/vendors/core/openai_api.py +26 -36
- synth_ai/{zyk/lms → lm}/vendors/openai_standard.py +29 -33
- synth_ai/{zyk/lms → lm}/vendors/retries.py +1 -1
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/{zyk/lms → lm}/vendors/supported/custom_endpoint.py +131 -118
- synth_ai/{zyk/lms → lm}/vendors/supported/deepseek.py +4 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/grok.py +6 -8
- synth_ai/{zyk/lms → lm}/vendors/supported/groq.py +1 -1
- synth_ai/{zyk/lms → lm}/vendors/supported/ollama.py +2 -2
- synth_ai/{zyk/lms → lm}/vendors/supported/openrouter.py +18 -16
- synth_ai/{zyk/lms → lm}/vendors/supported/together.py +1 -1
- synth_ai/tracing/__init__.py +0 -0
- synth_ai/tracing/abstractions.py +224 -0
- synth_ai/tracing/base_client.py +91 -0
- synth_ai/tracing/client_manager.py +131 -0
- synth_ai/tracing/config.py +140 -0
- synth_ai/tracing/context.py +146 -0
- synth_ai/tracing/decorators.py +679 -0
- synth_ai/tracing/events/__init__.py +0 -0
- synth_ai/tracing/events/manage.py +147 -0
- synth_ai/tracing/events/scope.py +86 -0
- synth_ai/tracing/events/store.py +227 -0
- synth_ai/tracing/immediate_client.py +152 -0
- synth_ai/tracing/local.py +18 -0
- synth_ai/tracing/log_client_base.py +74 -0
- synth_ai/tracing/retry_queue.py +187 -0
- synth_ai/tracing/trackers.py +515 -0
- synth_ai/tracing/upload.py +504 -0
- synth_ai/tracing/utils.py +9 -0
- synth_ai/zyk/__init__.py +28 -2
- synth_ai-0.2.1.dev0.dist-info/METADATA +349 -0
- synth_ai-0.2.1.dev0.dist-info/RECORD +261 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/WHEEL +1 -1
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai-0.2.0.dist-info/METADATA +0 -36
- synth_ai-0.2.0.dist-info/RECORD +0 -50
- /synth_ai/{zyk/lms/__init__.py → environments/reproducibility/helpers.py} +0 -0
- /synth_ai/{zyk/lms/caching → lm}/__init__.py +0 -0
- /synth_ai/{zyk/lms/core → lm/caching}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/caching/dbs.py +0 -0
- /synth_ai/{zyk/lms/cost → lm/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/core/exceptions.py +0 -0
- /synth_ai/{zyk/lms/structured_outputs → lm/cost}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors → lm/structured_outputs}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/tools/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/core → lm/vendors}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/base.py +0 -0
- /synth_ai/{zyk/lms/vendors/local → lm/vendors/core}/__init__.py +0 -0
- /synth_ai/{zyk/lms/vendors/supported → lm/vendors/local}/__init__.py +0 -0
- /synth_ai/{zyk/lms → lm}/vendors/local/ollama.py +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info/licenses}/LICENSE +0 -0
- {synth_ai-0.2.0.dist-info → synth_ai-0.2.1.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,283 @@
|
|
1
|
+
"""
|
2
|
+
Battle & Combat Reward Components
|
3
|
+
|
4
|
+
Rewards for battle victories, combat strategy, and battle efficiency.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
8
|
+
from typing import Dict, Any, Set
|
9
|
+
|
10
|
+
|
11
|
+
class WildPokemonDefeatedReward(RewardComponent):
|
12
|
+
"""Reward for defeating wild Pokemon - +3 points per defeat"""
|
13
|
+
|
14
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
15
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
16
|
+
current_in_battle = state["in_battle"]
|
17
|
+
battle_outcome = state.get("battle_outcome", 0)
|
18
|
+
|
19
|
+
# Exiting battle with victory (outcome = 1)
|
20
|
+
if prev_in_battle and not current_in_battle and battle_outcome == 1:
|
21
|
+
# Check if it was a wild Pokemon battle (no trainer)
|
22
|
+
# This would need additional state to distinguish wild vs trainer battles
|
23
|
+
return 3.0
|
24
|
+
return 0.0
|
25
|
+
|
26
|
+
|
27
|
+
class TrainerBattleVictoryReward(RewardComponent):
|
28
|
+
"""Reward for defeating trainers - +15 points"""
|
29
|
+
|
30
|
+
def __init__(self):
|
31
|
+
self.trainers_defeated: Set[tuple] = set()
|
32
|
+
|
33
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
34
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
35
|
+
current_in_battle = state["in_battle"]
|
36
|
+
battle_outcome = state.get("battle_outcome", 0)
|
37
|
+
|
38
|
+
# Exiting battle with victory
|
39
|
+
if prev_in_battle and not current_in_battle and battle_outcome == 1:
|
40
|
+
# Use location as trainer identifier
|
41
|
+
trainer_key = (state["player_x"], state["player_y"], state["map_id"])
|
42
|
+
if trainer_key not in self.trainers_defeated:
|
43
|
+
self.trainers_defeated.add(trainer_key)
|
44
|
+
return 15.0
|
45
|
+
return 0.0
|
46
|
+
|
47
|
+
|
48
|
+
class GymLeaderVictoryReward(RewardComponent):
|
49
|
+
"""Reward for defeating gym leaders - +100 points"""
|
50
|
+
|
51
|
+
def __init__(self):
|
52
|
+
self.gym_leaders_defeated: Set[int] = set()
|
53
|
+
# Gym map IDs (would be loaded from game data)
|
54
|
+
self.gym_maps = {20, 21, 22, 23, 24, 25, 26, 27}
|
55
|
+
|
56
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
57
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
58
|
+
current_in_battle = state["in_battle"]
|
59
|
+
battle_outcome = state.get("battle_outcome", 0)
|
60
|
+
current_map = state["map_id"]
|
61
|
+
|
62
|
+
# Victory in a gym
|
63
|
+
if (
|
64
|
+
prev_in_battle
|
65
|
+
and not current_in_battle
|
66
|
+
and battle_outcome == 1
|
67
|
+
and current_map in self.gym_maps
|
68
|
+
):
|
69
|
+
if current_map not in self.gym_leaders_defeated:
|
70
|
+
self.gym_leaders_defeated.add(current_map)
|
71
|
+
return 100.0
|
72
|
+
return 0.0
|
73
|
+
|
74
|
+
|
75
|
+
class EliteFourMemberVictoryReward(RewardComponent):
|
76
|
+
"""Reward for defeating Elite Four members - +200 points each"""
|
77
|
+
|
78
|
+
def __init__(self):
|
79
|
+
self.elite_four_defeated: Set[int] = set()
|
80
|
+
# Elite Four room IDs (would be loaded from game data)
|
81
|
+
self.elite_four_maps = {100, 101, 102, 103}
|
82
|
+
|
83
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
84
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
85
|
+
current_in_battle = state["in_battle"]
|
86
|
+
battle_outcome = state.get("battle_outcome", 0)
|
87
|
+
current_map = state["map_id"]
|
88
|
+
|
89
|
+
# Victory against Elite Four
|
90
|
+
if (
|
91
|
+
prev_in_battle
|
92
|
+
and not current_in_battle
|
93
|
+
and battle_outcome == 1
|
94
|
+
and current_map in self.elite_four_maps
|
95
|
+
):
|
96
|
+
if current_map not in self.elite_four_defeated:
|
97
|
+
self.elite_four_defeated.add(current_map)
|
98
|
+
return 200.0
|
99
|
+
return 0.0
|
100
|
+
|
101
|
+
|
102
|
+
class ChampionVictoryReward(RewardComponent):
|
103
|
+
"""Reward for defeating the Champion - +500 points"""
|
104
|
+
|
105
|
+
def __init__(self):
|
106
|
+
self.champion_defeated = False
|
107
|
+
self.champion_map = 104 # Champion room ID
|
108
|
+
|
109
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
110
|
+
if self.champion_defeated:
|
111
|
+
return 0.0
|
112
|
+
|
113
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
114
|
+
current_in_battle = state["in_battle"]
|
115
|
+
battle_outcome = state.get("battle_outcome", 0)
|
116
|
+
current_map = state["map_id"]
|
117
|
+
|
118
|
+
# Victory against Champion
|
119
|
+
if (
|
120
|
+
prev_in_battle
|
121
|
+
and not current_in_battle
|
122
|
+
and battle_outcome == 1
|
123
|
+
and current_map == self.champion_map
|
124
|
+
):
|
125
|
+
self.champion_defeated = True
|
126
|
+
return 500.0
|
127
|
+
return 0.0
|
128
|
+
|
129
|
+
|
130
|
+
class TypeAdvantageUsageReward(RewardComponent):
|
131
|
+
"""Reward for using super effective moves - +5 points"""
|
132
|
+
|
133
|
+
def __init__(self):
|
134
|
+
self.super_effective_count = 0
|
135
|
+
|
136
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
137
|
+
# This would need move effectiveness tracking
|
138
|
+
# Placeholder implementation - would need battle log analysis
|
139
|
+
if state["in_battle"]:
|
140
|
+
# Simplified: assume some moves are super effective
|
141
|
+
# Real implementation would track move types vs opponent types
|
142
|
+
move_used = action.get("move_used")
|
143
|
+
opponent_type = action.get("opponent_type")
|
144
|
+
|
145
|
+
if move_used and opponent_type:
|
146
|
+
if self._is_super_effective(move_used, opponent_type):
|
147
|
+
return 5.0
|
148
|
+
return 0.0
|
149
|
+
|
150
|
+
def _is_super_effective(self, move_type: str, opponent_type: str) -> bool:
|
151
|
+
"""Check if move is super effective against opponent"""
|
152
|
+
# Simplified type effectiveness chart
|
153
|
+
effectiveness = {
|
154
|
+
("water", "fire"): True,
|
155
|
+
("fire", "grass"): True,
|
156
|
+
("grass", "water"): True,
|
157
|
+
("electric", "water"): True,
|
158
|
+
# Add more type matchups
|
159
|
+
}
|
160
|
+
return effectiveness.get((move_type, opponent_type), False)
|
161
|
+
|
162
|
+
|
163
|
+
class CriticalHitReward(RewardComponent):
|
164
|
+
"""Reward for landing critical hits - +3 points"""
|
165
|
+
|
166
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
167
|
+
# This would need battle log analysis to detect critical hits
|
168
|
+
# Placeholder implementation
|
169
|
+
if state["in_battle"]:
|
170
|
+
critical_hit = action.get("critical_hit", False)
|
171
|
+
if critical_hit:
|
172
|
+
return 3.0
|
173
|
+
return 0.0
|
174
|
+
|
175
|
+
|
176
|
+
class StatusEffectUsageReward(RewardComponent):
|
177
|
+
"""Reward for successfully applying status effects - +5 points"""
|
178
|
+
|
179
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
180
|
+
# This would need status effect tracking
|
181
|
+
# Placeholder implementation
|
182
|
+
if state["in_battle"]:
|
183
|
+
status_applied = action.get("status_applied")
|
184
|
+
if status_applied in ["paralysis", "poison", "sleep", "burn", "freeze"]:
|
185
|
+
return 5.0
|
186
|
+
return 0.0
|
187
|
+
|
188
|
+
|
189
|
+
class OHKOReward(RewardComponent):
|
190
|
+
"""Reward for one-shot defeats - +10 points"""
|
191
|
+
|
192
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
193
|
+
# This would need damage tracking to detect OHKO
|
194
|
+
# Placeholder implementation
|
195
|
+
if state["in_battle"]:
|
196
|
+
opponent_defeated = action.get("opponent_defeated", False)
|
197
|
+
damage_dealt = action.get("damage_dealt", 0)
|
198
|
+
opponent_max_hp = action.get("opponent_max_hp", 100)
|
199
|
+
|
200
|
+
# OHKO if damage equals or exceeds max HP
|
201
|
+
if opponent_defeated and damage_dealt >= opponent_max_hp:
|
202
|
+
return 10.0
|
203
|
+
return 0.0
|
204
|
+
|
205
|
+
|
206
|
+
class FlawlessVictoryReward(RewardComponent):
|
207
|
+
"""Reward for winning without taking damage - +20 points"""
|
208
|
+
|
209
|
+
def __init__(self):
|
210
|
+
self.battle_start_hp: Dict[int, int] = {} # Track HP at battle start
|
211
|
+
|
212
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
213
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
214
|
+
current_in_battle = state["in_battle"]
|
215
|
+
battle_outcome = state.get("battle_outcome", 0)
|
216
|
+
|
217
|
+
# Track battle start
|
218
|
+
if not prev_in_battle and current_in_battle:
|
219
|
+
# Battle started - record current HP
|
220
|
+
party = state.get("party", [])
|
221
|
+
for i, pokemon in enumerate(party):
|
222
|
+
self.battle_start_hp[i] = pokemon.get("hp_current", 0)
|
223
|
+
|
224
|
+
# Check for flawless victory
|
225
|
+
elif prev_in_battle and not current_in_battle and battle_outcome == 1:
|
226
|
+
# Battle ended in victory - check if HP unchanged
|
227
|
+
party = state.get("party", [])
|
228
|
+
flawless = True
|
229
|
+
for i, pokemon in enumerate(party):
|
230
|
+
start_hp = self.battle_start_hp.get(i, 0)
|
231
|
+
current_hp = pokemon.get("hp_current", 0)
|
232
|
+
if current_hp < start_hp:
|
233
|
+
flawless = False
|
234
|
+
break
|
235
|
+
|
236
|
+
# Clear battle HP tracking
|
237
|
+
self.battle_start_hp.clear()
|
238
|
+
|
239
|
+
if flawless:
|
240
|
+
return 20.0
|
241
|
+
|
242
|
+
return 0.0
|
243
|
+
|
244
|
+
|
245
|
+
class UnderleveledVictoryReward(RewardComponent):
|
246
|
+
"""Reward for winning with significantly lower level Pokemon - +25 points"""
|
247
|
+
|
248
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
249
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
250
|
+
current_in_battle = state["in_battle"]
|
251
|
+
battle_outcome = state.get("battle_outcome", 0)
|
252
|
+
|
253
|
+
# Victory with level disadvantage
|
254
|
+
if prev_in_battle and not current_in_battle and battle_outcome == 1:
|
255
|
+
player_level = action.get("player_pokemon_level", 0)
|
256
|
+
opponent_level = action.get("opponent_pokemon_level", 0)
|
257
|
+
|
258
|
+
# Reward if player Pokemon is 5+ levels lower
|
259
|
+
if opponent_level - player_level >= 5:
|
260
|
+
return 25.0
|
261
|
+
return 0.0
|
262
|
+
|
263
|
+
|
264
|
+
class BattleStreakReward(RewardComponent):
|
265
|
+
"""Reward for consecutive battle wins - +5 points per battle in streak"""
|
266
|
+
|
267
|
+
def __init__(self):
|
268
|
+
self.current_streak = 0
|
269
|
+
|
270
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
271
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
272
|
+
current_in_battle = state["in_battle"]
|
273
|
+
battle_outcome = state.get("battle_outcome", 0)
|
274
|
+
|
275
|
+
# Battle ended
|
276
|
+
if prev_in_battle and not current_in_battle:
|
277
|
+
if battle_outcome == 1: # Victory
|
278
|
+
self.current_streak += 1
|
279
|
+
return 5.0
|
280
|
+
else: # Loss or other outcome
|
281
|
+
self.current_streak = 0
|
282
|
+
|
283
|
+
return 0.0
|
@@ -0,0 +1,149 @@
|
|
1
|
+
"""
|
2
|
+
Composite & Milestone Reward Components
|
3
|
+
|
4
|
+
Rewards for achievement combinations and progression streaks.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
8
|
+
from typing import Dict, Any, Set
|
9
|
+
|
10
|
+
|
11
|
+
class PerfectGymRunReward(RewardComponent):
|
12
|
+
"""Reward for defeating gym without losing any Pokemon - +200 points"""
|
13
|
+
|
14
|
+
def __init__(self):
|
15
|
+
self.gym_maps = {20, 21, 22, 23, 24, 25, 26, 27}
|
16
|
+
self.perfect_gyms: Set[int] = set()
|
17
|
+
self.gym_start_party_state: Dict[int, list] = {}
|
18
|
+
|
19
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
20
|
+
current_map = state["map_id"]
|
21
|
+
|
22
|
+
if current_map in self.gym_maps:
|
23
|
+
# Track gym entry
|
24
|
+
prev_map = action.get("prev_map_id", -1)
|
25
|
+
if prev_map not in self.gym_maps:
|
26
|
+
# Entering gym - record party state
|
27
|
+
self.gym_start_party_state[current_map] = state.get("party", [])
|
28
|
+
|
29
|
+
# Check for gym completion
|
30
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
31
|
+
current_in_battle = state["in_battle"]
|
32
|
+
battle_outcome = state.get("battle_outcome", 0)
|
33
|
+
|
34
|
+
if (
|
35
|
+
prev_in_battle
|
36
|
+
and not current_in_battle
|
37
|
+
and battle_outcome == 1
|
38
|
+
and current_map not in self.perfect_gyms
|
39
|
+
):
|
40
|
+
# Gym leader defeated - check if perfect run
|
41
|
+
start_party = self.gym_start_party_state.get(current_map, [])
|
42
|
+
current_party = state.get("party", [])
|
43
|
+
|
44
|
+
# Check if all Pokemon maintained their HP
|
45
|
+
perfect = True
|
46
|
+
for i, (start_pkmn, current_pkmn) in enumerate(zip(start_party, current_party)):
|
47
|
+
if current_pkmn.get("hp_current", 0) < start_pkmn.get("hp_current", 0):
|
48
|
+
perfect = False
|
49
|
+
break
|
50
|
+
|
51
|
+
if perfect:
|
52
|
+
self.perfect_gyms.add(current_map)
|
53
|
+
return 200.0
|
54
|
+
|
55
|
+
return 0.0
|
56
|
+
|
57
|
+
|
58
|
+
class AreaMasteryReward(RewardComponent):
|
59
|
+
"""Reward for full area completion - +100 points"""
|
60
|
+
|
61
|
+
def __init__(self):
|
62
|
+
self.mastered_areas: Set[int] = set()
|
63
|
+
|
64
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
65
|
+
# Placeholder - would need comprehensive area tracking
|
66
|
+
return 0.0
|
67
|
+
|
68
|
+
|
69
|
+
class SpeedrunMilestonesReward(RewardComponent):
|
70
|
+
"""Reward for reaching story points within time limits - +50 points"""
|
71
|
+
|
72
|
+
def __init__(self):
|
73
|
+
self.milestones_reached: Set[str] = set()
|
74
|
+
|
75
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
76
|
+
# Placeholder - would need time tracking
|
77
|
+
return 0.0
|
78
|
+
|
79
|
+
|
80
|
+
class ExplorationStreakReward(RewardComponent):
|
81
|
+
"""Reward for consecutive new area discoveries - +2 points per consecutive area"""
|
82
|
+
|
83
|
+
def __init__(self):
|
84
|
+
self.streak = 0
|
85
|
+
self.last_area = -1
|
86
|
+
|
87
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
88
|
+
current_map = state["map_id"]
|
89
|
+
prev_map = action.get("prev_map_id", -1)
|
90
|
+
|
91
|
+
if current_map != prev_map and current_map != self.last_area:
|
92
|
+
# New area discovered
|
93
|
+
self.streak += 1
|
94
|
+
self.last_area = current_map
|
95
|
+
return 2.0 * self.streak
|
96
|
+
elif current_map == prev_map:
|
97
|
+
# Stayed in same area - reset streak
|
98
|
+
self.streak = 0
|
99
|
+
|
100
|
+
return 0.0
|
101
|
+
|
102
|
+
|
103
|
+
class BattleWinStreakReward(RewardComponent):
|
104
|
+
"""Reward for consecutive battle wins - +3 points per consecutive win"""
|
105
|
+
|
106
|
+
def __init__(self):
|
107
|
+
self.win_streak = 0
|
108
|
+
|
109
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
110
|
+
prev_in_battle = action.get("prev_in_battle", False)
|
111
|
+
current_in_battle = state["in_battle"]
|
112
|
+
battle_outcome = state.get("battle_outcome", 0)
|
113
|
+
|
114
|
+
if prev_in_battle and not current_in_battle:
|
115
|
+
if battle_outcome == 1: # Victory
|
116
|
+
self.win_streak += 1
|
117
|
+
return 3.0 * self.win_streak
|
118
|
+
else: # Loss
|
119
|
+
self.win_streak = 0
|
120
|
+
|
121
|
+
return 0.0
|
122
|
+
|
123
|
+
|
124
|
+
class PerfectDayReward(RewardComponent):
|
125
|
+
"""Reward for a session with no Pokemon fainting - +100 points"""
|
126
|
+
|
127
|
+
def __init__(self):
|
128
|
+
self.perfect_day_achieved = False
|
129
|
+
self.any_pokemon_fainted = False
|
130
|
+
|
131
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
132
|
+
if self.perfect_day_achieved:
|
133
|
+
return 0.0
|
134
|
+
|
135
|
+
# Check if any Pokemon fainted
|
136
|
+
party = state.get("party", [])
|
137
|
+
for pokemon in party:
|
138
|
+
if pokemon.get("hp_current", 1) == 0:
|
139
|
+
self.any_pokemon_fainted = True
|
140
|
+
break
|
141
|
+
|
142
|
+
# Check for end of session (would need session detection)
|
143
|
+
# Placeholder implementation
|
144
|
+
step_count = state.get("step_count", 0)
|
145
|
+
if step_count >= 100 and not self.any_pokemon_fainted: # Example session length
|
146
|
+
self.perfect_day_achieved = True
|
147
|
+
return 100.0
|
148
|
+
|
149
|
+
return 0.0
|
@@ -0,0 +1,137 @@
|
|
1
|
+
"""
|
2
|
+
Economy & Resource Management Reward Components
|
3
|
+
|
4
|
+
Rewards for money management and inventory optimization.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
8
|
+
from typing import Dict, Any, Set
|
9
|
+
|
10
|
+
|
11
|
+
class FirstEarningsReward(RewardComponent):
|
12
|
+
"""Reward for earning first money from battles - +10 points"""
|
13
|
+
|
14
|
+
def __init__(self):
|
15
|
+
self.first_earnings = False
|
16
|
+
|
17
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
18
|
+
if self.first_earnings:
|
19
|
+
return 0.0
|
20
|
+
|
21
|
+
prev_money = action.get("prev_money", 0)
|
22
|
+
current_money = state.get("money", 0)
|
23
|
+
|
24
|
+
if current_money > prev_money and prev_money == 0:
|
25
|
+
self.first_earnings = True
|
26
|
+
return 10.0
|
27
|
+
return 0.0
|
28
|
+
|
29
|
+
|
30
|
+
class WealthMilestonesReward(RewardComponent):
|
31
|
+
"""Reward for reaching money milestones - +25 points"""
|
32
|
+
|
33
|
+
def __init__(self):
|
34
|
+
self.milestones_reached: Set[int] = set()
|
35
|
+
self.milestones = [1000, 5000, 10000, 50000]
|
36
|
+
|
37
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
38
|
+
current_money = state.get("money", 0)
|
39
|
+
total_reward = 0.0
|
40
|
+
|
41
|
+
for milestone in self.milestones:
|
42
|
+
if current_money >= milestone and milestone not in self.milestones_reached:
|
43
|
+
self.milestones_reached.add(milestone)
|
44
|
+
total_reward += 25.0
|
45
|
+
|
46
|
+
return total_reward
|
47
|
+
|
48
|
+
|
49
|
+
class SmartPurchasesReward(RewardComponent):
|
50
|
+
"""Reward for buying useful items - +10 points"""
|
51
|
+
|
52
|
+
def __init__(self):
|
53
|
+
self.useful_items = {4, 5, 6, 10, 11, 12} # Pokeballs, Potions
|
54
|
+
|
55
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
56
|
+
prev_inventory = action.get("prev_inventory", [])
|
57
|
+
current_inventory = state.get("inventory", [])
|
58
|
+
prev_money = action.get("prev_money", 0)
|
59
|
+
current_money = state.get("money", 0)
|
60
|
+
|
61
|
+
# Money decreased (purchase made)
|
62
|
+
if current_money < prev_money:
|
63
|
+
prev_items = {item.get("item_id") for item in prev_inventory}
|
64
|
+
current_items = {item.get("item_id") for item in current_inventory}
|
65
|
+
new_items = current_items - prev_items
|
66
|
+
|
67
|
+
for item_id in new_items:
|
68
|
+
if item_id in self.useful_items:
|
69
|
+
return 10.0
|
70
|
+
return 0.0
|
71
|
+
|
72
|
+
|
73
|
+
class RarePurchaseReward(RewardComponent):
|
74
|
+
"""Reward for buying expensive items - +20 points"""
|
75
|
+
|
76
|
+
def __init__(self):
|
77
|
+
self.expensive_items = {50, 51, 52} # TMs, evolution stones
|
78
|
+
|
79
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
80
|
+
prev_inventory = action.get("prev_inventory", [])
|
81
|
+
current_inventory = state.get("inventory", [])
|
82
|
+
prev_money = action.get("prev_money", 0)
|
83
|
+
current_money = state.get("money", 0)
|
84
|
+
|
85
|
+
# Money decreased significantly (expensive purchase)
|
86
|
+
if current_money < prev_money - 1000:
|
87
|
+
prev_items = {item.get("item_id") for item in prev_inventory}
|
88
|
+
current_items = {item.get("item_id") for item in current_inventory}
|
89
|
+
new_items = current_items - prev_items
|
90
|
+
|
91
|
+
for item_id in new_items:
|
92
|
+
if item_id in self.expensive_items:
|
93
|
+
return 20.0
|
94
|
+
return 0.0
|
95
|
+
|
96
|
+
|
97
|
+
class InventoryOrganizationReward(RewardComponent):
|
98
|
+
"""Reward for effective bag management - +5 points"""
|
99
|
+
|
100
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
101
|
+
# This would need inventory management tracking
|
102
|
+
# Placeholder implementation
|
103
|
+
return 0.0
|
104
|
+
|
105
|
+
|
106
|
+
class HealingItemUsageReward(RewardComponent):
|
107
|
+
"""Reward for timely use of potions/healing items - +3 points"""
|
108
|
+
|
109
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
110
|
+
# Check for healing item usage when Pokemon HP is low
|
111
|
+
party = state.get("party", [])
|
112
|
+
prev_inventory = action.get("prev_inventory", [])
|
113
|
+
current_inventory = state.get("inventory", [])
|
114
|
+
|
115
|
+
# Item count decreased (item used)
|
116
|
+
if len(current_inventory) < len(prev_inventory):
|
117
|
+
for pokemon in party:
|
118
|
+
hp_percentage = pokemon.get("hp_current", 0) / max(pokemon.get("hp_max", 1), 1)
|
119
|
+
if hp_percentage < 0.5: # Low HP
|
120
|
+
return 3.0
|
121
|
+
return 0.0
|
122
|
+
|
123
|
+
|
124
|
+
class PokeballEfficiencyReward(RewardComponent):
|
125
|
+
"""Reward for successful captures - +5 points"""
|
126
|
+
|
127
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
128
|
+
# Check for Pokemon capture (party size increase + pokeball usage)
|
129
|
+
prev_party_count = len(action.get("prev_party", []))
|
130
|
+
current_party_count = len(state.get("party", []))
|
131
|
+
prev_inventory = action.get("prev_inventory", [])
|
132
|
+
current_inventory = state.get("inventory", [])
|
133
|
+
|
134
|
+
# Pokemon captured and pokeball used
|
135
|
+
if current_party_count > prev_party_count and len(current_inventory) < len(prev_inventory):
|
136
|
+
return 5.0
|
137
|
+
return 0.0
|
@@ -0,0 +1,56 @@
|
|
1
|
+
"""
|
2
|
+
Efficiency & Optimization Reward Components
|
3
|
+
|
4
|
+
Rewards for optimal play, routing, and game knowledge.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from synth_ai.environments.environment.rewards.core import RewardComponent
|
8
|
+
from typing import Dict, Any
|
9
|
+
|
10
|
+
|
11
|
+
class FastTravelUsageReward(RewardComponent):
|
12
|
+
"""Reward for using Fly effectively - +10 points"""
|
13
|
+
|
14
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
15
|
+
# Placeholder - would detect Fly usage
|
16
|
+
return 0.0
|
17
|
+
|
18
|
+
|
19
|
+
class OptimalRoutingReward(RewardComponent):
|
20
|
+
"""Reward for taking efficient paths - +15 points"""
|
21
|
+
|
22
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
23
|
+
# Placeholder - would analyze path efficiency
|
24
|
+
return 0.0
|
25
|
+
|
26
|
+
|
27
|
+
class PuzzleSolvingReward(RewardComponent):
|
28
|
+
"""Reward for solving puzzles quickly - +25 points"""
|
29
|
+
|
30
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
31
|
+
# Placeholder - would detect puzzle completion
|
32
|
+
return 0.0
|
33
|
+
|
34
|
+
|
35
|
+
class MoveEffectivenessReward(RewardComponent):
|
36
|
+
"""Reward for consistently using type advantages - +8 points"""
|
37
|
+
|
38
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
39
|
+
# Placeholder - would track type effectiveness usage
|
40
|
+
return 0.0
|
41
|
+
|
42
|
+
|
43
|
+
class EvolutionTimingReward(RewardComponent):
|
44
|
+
"""Reward for evolving Pokemon at optimal times - +15 points"""
|
45
|
+
|
46
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
47
|
+
# Placeholder - would analyze evolution timing
|
48
|
+
return 0.0
|
49
|
+
|
50
|
+
|
51
|
+
class HMUsageReward(RewardComponent):
|
52
|
+
"""Reward for using HMs in appropriate situations - +10 points"""
|
53
|
+
|
54
|
+
async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
|
55
|
+
# Placeholder - would detect appropriate HM usage
|
56
|
+
return 0.0
|