PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

synth_ai/environments/examples/red/engine_helpers/reward_components.py CHANGED Viewed

@@ -3,274 +3,246 @@ from typing import Any, Dict, Set
 from synth_ai.environments.environment.rewards.core import RewardComponent
-class BadgeRewardComponent(RewardComponent):
-    """Reward for earning gym badges"""
+# ===== COMPREHENSIVE POKEMON RED PROGRESS REWARD SYSTEM =====
+# Designed for deterministic rewards that guide toward beating Brock at Pewter Gym
-    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_badges = action.get("prev_badges", 0)
-        current_badges = state["badges"]
-        new_badges = current_badges & ~prev_badges
-        badge_count = bin(new_badges).count("1")
-        return badge_count * 1.0
+class RouteExplorationReward(RewardComponent):
+    """High rewards for reaching key areas on the path to Pewter Gym - guides exploration"""
-class MapTransitionComponent(RewardComponent):
-    """Reward for exploring new areas"""
+    def __init__(self):
+        self.key_areas_reached: Set[int] = set()
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_map = action.get("prev_map_id", -1)
         current_map = state["map_id"]
-        return 0.1 if current_map != prev_map else 0.0
+        prev_map = action.get("prev_map_id", -1)
-class BattleVictoryComponent(RewardComponent):
-    """Reward for winning battles"""
+        # Key maps and rewards for progressing toward Pewter Gym
+        area_rewards = {
+            0: 0.0,  # Pallet Town (starting point)
+            1: 2.0,  # Route 1 - First step out of town (+2.0)
+            2: 1.5,  # Viridian City - Major hub (+1.5)
+            3: 1.0,  # Route 22 - Path to League (+1.0)
+            4: 1.0,  # Route 2 - To Viridian Forest (+1.0)
+            5: 2.0,  # Viridian Forest - Dense area (+2.0)
+            6: 1.5,  # Pewter City - Target city (+1.5)
+            7: 5.0,  # Pewter Gym - GOAL AREA (+5.0 for entering gym)
+        }
+        if current_map in area_rewards and current_map not in self.key_areas_reached:
+            if prev_map != current_map:  # Only reward when actually entering new area
+                self.key_areas_reached.add(current_map)
+                return area_rewards[current_map]
-    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_in_battle = action.get("prev_in_battle", False)
-        current_in_battle = state["in_battle"]
-        battle_outcome = state["battle_outcome"]
-        # Transitioning from battle to not in battle with victory
-        if prev_in_battle and not current_in_battle and battle_outcome == 1:
-            return 0.5
         return 0.0
-class LevelUpComponent(RewardComponent):
-    """Reward for Pokemon leveling up"""
+class StrategicTrainingReward(RewardComponent):
+    """Rewards for building Pokemon strength strategically"""
+    def __init__(self):
+        self.level_milestones: Set[int] = set()
+        self.last_level = 0
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
+        current_level = state.get("party_level", 0)
         prev_level = action.get("prev_party_level", 0)
-        current_level = state["party_level"]
-        level_gain = max(0, current_level - prev_level)
-        return level_gain * 0.3
+        # Reward reaching key level milestones
+        milestone_rewards = {
+            8: 1.0,   # Level 8 - Good for early battles
+            12: 2.0,  # Level 12 - Ready for Brock
+            15: 3.0,  # Level 15 - Strong Pokemon
+        }
-class XPGainComponent(RewardComponent):
-    """Small reward for XP gains"""
+        if current_level > prev_level and current_level in milestone_rewards:
+            if current_level not in self.level_milestones:
+                self.level_milestones.add(current_level)
+                return milestone_rewards[current_level]
-    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_xp = action.get("prev_party_xp", 0)
-        current_xp = state["party_xp"]
-        xp_gain = max(0, current_xp - prev_xp)
-        return xp_gain * 0.001  # Very small multiplier
+        # Small reward for any level up (0.2 points)
+        if current_level > prev_level:
+            return 0.2
+        return 0.0
-class StepPenaltyComponent(RewardComponent):
-    """Small penalty for each step to encourage efficiency"""
-    def __init__(self, penalty: float = -0.001):
-        self.penalty = penalty
+class BattleProgressionReward(RewardComponent):
+    """Rewards for winning battles and gaining experience"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        return self.penalty
+        prev_in_battle = action.get("prev_in_battle", False)
+        current_in_battle = state.get("in_battle", False)
+        battle_outcome = state.get("battle_outcome", 0)
+        # Large reward for battle victory (+1.0)
+        if prev_in_battle and not current_in_battle and battle_outcome == 1:
+            return 1.0
-class MenuPenaltyComponent(RewardComponent):
-    """Penalty for excessive menu usage"""
+        # Small reward for entering battle (+0.1) - shows engagement
+        if not prev_in_battle and current_in_battle:
+            return 0.1
-    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        # This would need more sophisticated menu tracking
         return 0.0
-# ===== NEW EARLY GAME PALLET TOWN REWARDS =====
-class ExitHouseReward(RewardComponent):
-    """High reward for first time leaving the starting house - +2.0 points"""
+class GymPreparationReward(RewardComponent):
+    """Rewards for preparing to challenge Brock"""
     def __init__(self):
-        self.house_exited = False
+        self.prepared_for_gym = False
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.house_exited:
+        if self.prepared_for_gym:
             return 0.0
-        prev_map = action.get("prev_map_id", -1)
-        current_map = state["map_id"]
+        # Check if in Pewter City area and have decent Pokemon
+        if state["map_id"] in [6, 7]:  # Pewter City or Gym
+            party_level = state.get("party_level", 0)
+            party_count = len(state.get("party", []))
+            # Reward being prepared for gym battle
+            if party_level >= 10 and party_count >= 1:
+                self.prepared_for_gym = True
+                return 3.0  # Significant reward for being gym-ready
-        # Exit from house to town (assuming house maps are 1,2 and town is 0)
-        if prev_map in [1, 2] and current_map == 0:
-            self.house_exited = True
-            return 2.0
         return 0.0
-class NPCInteractionReward(RewardComponent):
-    """Reward for talking to NPCs - +0.8 points per unique NPC"""
+class ItemCollectionReward(RewardComponent):
+    """Rewards for collecting useful items"""
     def __init__(self):
-        self.npcs_talked_to: Set[tuple] = set()
+        self.items_collected: Set[int] = set()
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        # Detect NPC conversations
-        if state["text_box_active"] and not action.get("prev_text_box_active", False):
-            # Use position as NPC identifier
-            npc_key = (state["player_x"], state["player_y"], state["map_id"])
-            if npc_key not in self.npcs_talked_to:
-                self.npcs_talked_to.add(npc_key)
-                return 0.8
-        return 0.0
+        prev_inventory = action.get("prev_inventory", [])
+        current_inventory = state.get("inventory", [])
+        # Check for new items
+        prev_item_ids = {item["item_id"] for item in prev_inventory}
+        current_item_ids = {item["item_id"] for item in current_inventory}
-class OakLabDiscoveryReward(RewardComponent):
-    """High reward for finding and entering Oak's lab - +2.5 points"""
+        new_items = current_item_ids - prev_item_ids
-    def __init__(self):
-        self.lab_discovered = False
+        # Reward valuable items for gym preparation
+        valuable_items = {1, 2, 3, 4, 5, 10, 11, 12, 13}  # Potions, Balls, etc.
+        reward = 0.0
+        for item_id in new_items:
+            if item_id not in self.items_collected:
+                self.items_collected.add(item_id)
+                if item_id in valuable_items:
+                    reward += 0.5  # +0.5 per valuable item
+                else:
+                    reward += 0.1  # +0.1 per other item
+        return reward
+class HealingManagementReward(RewardComponent):
+    """Rewards for keeping Pokemon healthy"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.lab_discovered:
+        prev_party = action.get("prev_party", [])
+        current_party = state.get("party", [])
+        if not prev_party or not current_party:
             return 0.0
-        prev_map = action.get("prev_map_id", -1)
-        current_map = state["map_id"]
+        # Reward healing Pokemon back to full health
+        prev_hp_pct = sum(p.get("hp_percentage", 0) for p in prev_party) / len(prev_party)
+        current_hp_pct = sum(p.get("hp_percentage", 0) for p in current_party) / len(current_party)
+        # Significant improvement in health
+        if current_hp_pct > prev_hp_pct + 20:  # Healed at least 20% overall
+            return 0.8
+        # Small reward for maintaining good health
+        if current_hp_pct >= 80 and prev_hp_pct >= 80:
+            return 0.05
-        # Entering Oak's lab (assuming map 3)
-        if prev_map == 0 and current_map == 3:
-            self.lab_discovered = True
-            return 2.5
         return 0.0
-class StarterPokemonReward(RewardComponent):
-    """Very high reward for getting first Pokemon - +10.0 points"""
+class EfficientExplorationReward(RewardComponent):
+    """Rewards for exploring efficiently without getting lost"""
     def __init__(self):
-        self.starter_obtained = False
+        self.positions_visited: Set[tuple] = set()
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.starter_obtained:
-            return 0.0
+        # Track unique positions visited in each map
+        position_key = (state["map_id"], state["player_x"], state["player_y"])
-        # Detect getting first Pokemon
-        prev_party_count = len(action.get("prev_party", []))
-        current_party_count = len(state.get("party", []))
+        if position_key not in self.positions_visited:
+            self.positions_visited.add(position_key)
+            return 0.02  # Small reward for discovering new areas
-        if prev_party_count == 0 and current_party_count == 1:
-            if state["map_id"] == 3:  # In Oak's lab
-                self.starter_obtained = True
-                return 10.0
         return 0.0
-class FirstBattleReward(RewardComponent):
-    """High reward for engaging in first battle - +5.0 points"""
-    def __init__(self):
-        self.first_battle = False
+class BadgeVictoryReward(RewardComponent):
+    """HUGE reward for achieving the main goal - Boulder Badge"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.first_battle:
-            return 0.0
+        prev_badges = action.get("prev_badges", 0)
+        current_badges = state.get("badges", 0)
-        prev_in_battle = action.get("prev_in_battle", False)
-        current_in_battle = state["in_battle"]
+        # Check if Boulder Badge (bit 0) was newly earned
+        boulder_badge_mask = 0x01
+        prev_has_badge = prev_badges & boulder_badge_mask
+        current_has_badge = current_badges & boulder_badge_mask
+        if not prev_has_badge and current_has_badge:
+            return 50.0  # MASSIVE reward for completing the main objective
-        if not prev_in_battle and current_in_battle:
-            self.first_battle = True
-            return 5.0
         return 0.0
-class DirectionExplorationReward(RewardComponent):
-    """Reward for trying all movement directions - +1.0 points when complete"""
+class StepPenaltyComponent(RewardComponent):
+    """Small penalty for each step to encourage efficiency"""
-    def __init__(self):
-        self.directions_tried: Set[str] = set()
-        self.reward_given = False
+    def __init__(self, penalty: float = 0.0):  # Changed from -0.005 to 0.0
+        self.penalty = penalty
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.reward_given:
-            return 0.0
+        return self.penalty
-        # Track movement directions based on position changes
-        prev_x = action.get("prev_player_x", state["player_x"])
-        prev_y = action.get("prev_player_y", state["player_y"])
-        current_x = state["player_x"]
-        current_y = state["player_y"]
-        if current_x > prev_x:
-            self.directions_tried.add("RIGHT")
-        elif current_x < prev_x:
-            self.directions_tried.add("LEFT")
-        elif current_y > prev_y:
-            self.directions_tried.add("DOWN")
-        elif current_y < prev_y:
-            self.directions_tried.add("UP")
-        if len(self.directions_tried) >= 4:
-            self.reward_given = True
-            return 1.0
-        return 0.0
+# ===== LEGACY COMPONENTS (kept for compatibility) =====
-class BuildingExplorationReward(RewardComponent):
-    """Reward for entering different buildings - +0.5 points per building"""
-    def __init__(self):
-        self.buildings_entered: Set[int] = set()
+class BadgeRewardComponent(RewardComponent):
+    """Legacy badge reward - now handled by BadgeVictoryReward"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_map = action.get("prev_map_id", -1)
-        current_map = state["map_id"]
+        return 0.0  # Handled by BadgeVictoryReward
-        # Entering a new building from town
-        if (
-            prev_map == 0 and current_map > 0 and current_map not in [1, 2]
-        ):  # From town to new building
-            if current_map not in self.buildings_entered:
-                self.buildings_entered.add(current_map)
-                return 0.5
-        return 0.0
+class MapTransitionComponent(RewardComponent):
+    """Legacy map transition - now handled by RouteExplorationReward"""
+    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
+        return 0.0  # Handled by RouteExplorationReward
-class ObjectInteractionReward(RewardComponent):
-    """Reward for pressing A on various objects - +0.3 points per object"""
-    def __init__(self):
-        self.objects_interacted: Set[tuple] = set()
+class BattleVictoryComponent(RewardComponent):
+    """Legacy battle victory - now handled by BattleProgressionReward"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        # Detect A button interactions that trigger text
-        if state["text_box_active"] and not action.get("prev_text_box_active", False):
-            object_key = (state["player_x"], state["player_y"], state["map_id"])
-            if object_key not in self.objects_interacted:
-                self.objects_interacted.add(object_key)
-                return 0.3
-        return 0.0
+        return 0.0  # Handled by BattleProgressionReward
-class TownExplorationReward(RewardComponent):
-    """Reward for thorough town exploration - +0.1 per new position"""
-    def __init__(self):
-        self.positions_visited: Set[tuple] = set()
+class LevelUpComponent(RewardComponent):
+    """Legacy level up - now handled by StrategicTrainingReward"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if state["map_id"] == 0:  # In Pallet Town
-            position_key = (state["player_x"], state["player_y"])
-            if position_key not in self.positions_visited:
-                self.positions_visited.add(position_key)
-                return 0.1
-        return 0.0
+        return 0.0  # Handled by StrategicTrainingReward
-class RouteAttemptReward(RewardComponent):
-    """Reward for trying to leave town (triggers story) - +3.0 points"""
-    def __init__(self):
-        self.route_attempted = False
+class XPGainComponent(RewardComponent):
+    """Legacy XP gain - now handled by StrategicTrainingReward"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.route_attempted:
-            return 0.0
-        # Detect reaching the edge of Pallet Town (attempting to go north)
-        if state["map_id"] == 0:  # In Pallet Town
-            if state["player_y"] <= 1:  # At northern edge
-                self.route_attempted = True
-                return 3.0
-        return 0.0
+        return 0.0  # Handled by StrategicTrainingReward

synth_ai/environments/examples/red/environment.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 from typing import Any, Dict, List, Optional, Union
 import base64
+import time
 from io import BytesIO
 from pydantic import BaseModel, Field
@@ -19,6 +20,8 @@ from synth_ai.environments.environment.tools import (
 )
 from synth_ai.environments.reproducibility.core import ReproducibleEnvironment
 from synth_ai.environments.stateful.core import StatefulEnvironment
+from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
+from synth_ai.tracing_v3.session_tracer import SessionTracer
 try:  # optional for image encoding
     import numpy as _np  # type: ignore
     from PIL import Image as _PILImage  # type: ignore
@@ -121,6 +124,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
         task_instance: Optional[PokemonRedTaskInstance] = None,
         custom_step_obs: Optional[GetObservationCallable] = None,
         custom_ckpt_obs: Optional[GetObservationCallable] = None,
+        tracer: Optional[SessionTracer] = None,
     ):
         self.name = "PokemonRed"
         self.task_instance = task_instance or DEFAULT_TASK_INSTANCE
@@ -129,6 +133,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
             custom_ckpt_obs or PokemonRedObservationCallable()
         )
         self.engine = PokemonRedEngine(self.task_instance)
+        self.tracer = tracer
         # Register tools
         self._press_button_tool = PressButtonTool(self.engine)
@@ -203,6 +208,27 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
                 if tool_result.error and hasattr(pub_state, "error_info"):
                     pub_state.error_info = tool_result.error
+        # Record EnvironmentEvent for tracing if tracer is available
+        if self.tracer and hasattr(priv_state, 'reward_last_step'):
+            # Get state information for the event
+            prev_state = getattr(self.engine, '_previous_state', None)
+            terminated = getattr(priv_state, 'terminated', False)
+            truncated = getattr(priv_state, 'truncated', False)
+            # Convert states to dict for serialization
+            pub_state_dict = pub_state.__dict__ if hasattr(pub_state, '__dict__') else pub_state
+            env_event = EnvironmentEvent(
+                system_instance_id="pokemon_red_env",
+                time_record=TimeRecord(event_time=time.time()),
+                reward=float(priv_state.reward_last_step),
+                terminated=terminated,
+                truncated=truncated,
+                system_state_before=prev_state if prev_state else None,
+                system_state_after=pub_state_dict,
+            )
+            await self.tracer.record_event(env_event)
         return await self._to_observation(
             priv_state, pub_state, self.custom_step_observation_callable
         )

synth_ai/environments/examples/red/trace_hooks_v3.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""
+Trace hooks for Pokemon Red environment - v3 version.
+Captures reward information and saves to Turso database.
+"""
+from datetime import datetime
+from typing import Any, Dict, Optional
+from synth_ai.tracing_v3.abstractions import BaseEvent, EnvironmentEvent
+from synth_ai.tracing_v3.hooks import HookManager
+# Pokemon Red achievement categories by reward value
+EXPLORATION_ACHIEVEMENTS = {
+    0.02: "explore_new_area",
+    0.04: "explore_multiple_areas",
+    1.0: "leave_starting_area",
+    1.5: "enter_new_city",
+    2.0: "explore_new_route",
+    5.0: "enter_gym_building",
+}
+TRAINING_ACHIEVEMENTS = {
+    0.2: "pokemon_level_up",
+    0.3: "reach_power_level",
+    3.0: "pokemon_ready_for_battle",
+}
+BATTLE_ACHIEVEMENTS = {
+    0.1: "encounter_wild_pokemon",
+}
+RESOURCE_ACHIEVEMENTS = {
+    0.05: "keep_pokemon_healthy",
+    0.5: "find_valuable_item",
+    0.8: "visit_pokemon_center",
+}
+MAJOR_ACHIEVEMENTS = {
+    50.0: "defeat_brock_win_badge",
+}
+async def track_pokemon_rewards(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
+    """Hook that captures detailed Pokemon Red reward information."""
+    # Only process EnvironmentEvents
+    if not isinstance(event_obj, EnvironmentEvent):
+        return None
+    reward = event_obj.reward
+    if reward is None or reward == 0.0:
+        return None
+    # Determine achievement type based on reward value
+    achievement_type = "unknown"
+    achievement_category = "other"
+    # Check each category
+    if reward in EXPLORATION_ACHIEVEMENTS:
+        achievement_type = EXPLORATION_ACHIEVEMENTS[reward]
+        achievement_category = "exploration"
+    elif reward in TRAINING_ACHIEVEMENTS:
+        achievement_type = TRAINING_ACHIEVEMENTS[reward]
+        achievement_category = "training"
+    elif reward in BATTLE_ACHIEVEMENTS:
+        achievement_type = BATTLE_ACHIEVEMENTS[reward]
+        achievement_category = "battle"
+    elif reward in RESOURCE_ACHIEVEMENTS:
+        achievement_type = RESOURCE_ACHIEVEMENTS[reward]
+        achievement_category = "resource"
+    elif reward in MAJOR_ACHIEVEMENTS:
+        achievement_type = MAJOR_ACHIEVEMENTS[reward]
+        achievement_category = "major"
+    return {
+        "reward_value": reward,
+        "achievement_type": achievement_type,
+        "achievement_category": achievement_category,
+        "timestamp": datetime.now().isoformat(),
+        "system_state_before": event_obj.system_state_before,
+        "system_state_after": event_obj.system_state_after,
+    }
+async def track_pokemon_milestones(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
+    """Hook that tracks significant Pokemon Red milestones."""
+    # Only process EnvironmentEvents
+    if not isinstance(event_obj, EnvironmentEvent):
+        return None
+    reward = event_obj.reward
+    if reward is None:
+        return None
+    # Track major milestones
+    if reward >= 1.0:  # Significant progress rewards
+        return {
+            "milestone": "major_progress",
+            "reward": reward,
+            "timestamp": datetime.now().isoformat(),
+        }
+    elif reward >= 0.5:  # Moderate rewards
+        return {
+            "milestone": "moderate_progress",
+            "reward": reward,
+            "timestamp": datetime.now().isoformat(),
+        }
+    return None
+async def track_pokemon_outcomes(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
+    """Hook that tracks episode outcomes for Pokemon Red."""
+    # Only process EnvironmentEvents
+    if not isinstance(event_obj, EnvironmentEvent):
+        return None
+    # Check for termination conditions
+    if event_obj.terminated or event_obj.truncated:
+        total_reward = getattr(event_obj, 'total_reward', 0.0)
+        steps_taken = getattr(event_obj, 'step_count', 0)
+        # Extract achievement information from system state
+        achievements_count = 0
+        if event_obj.system_state_after:
+            # Count positive rewards as achievements
+            # This is a simplified count - in practice you'd track actual achievements
+            achievements_count = max(1, int(total_reward / 0.1))  # Rough estimate
+        return {
+            "outcome_type": "episode_end",
+            "total_reward": total_reward,
+            "steps_taken": steps_taken,
+            "achievements_count": achievements_count,
+            "terminated": event_obj.terminated,
+            "truncated": event_obj.truncated,
+            "timestamp": datetime.now().isoformat(),
+        }
+    return None
+# Create the global POKEMON_RED_HOOKS instance
+POKEMON_RED_HOOKS = HookManager()
+# Register all hooks
+POKEMON_RED_HOOKS.register(
+    "event_recorded",
+    track_pokemon_rewards,
+    name="pokemon_rewards",
+    priority=10,
+    event_types=["environment"],
+)
+POKEMON_RED_HOOKS.register(
+    "event_recorded",
+    track_pokemon_milestones,
+    name="pokemon_milestones",
+    priority=5,
+    event_types=["environment"],
+)
+POKEMON_RED_HOOKS.register(
+    "event_recorded",
+    track_pokemon_outcomes,
+    name="pokemon_outcomes",
+    priority=5,
+    event_types=["environment"],
+)

synth_ai/http.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""
+Backward-compatible HTTP client exports.
+Historically, some modules imported ``synth_ai.http``. The canonical location
+is ``synth_ai.http_client``; this module simply re-exports the same symbols so
+legacy imports keep working.
+"""
+from synth_ai.http_client import AsyncHttpClient, HTTPError, sleep
+__all__ = ["AsyncHttpClient", "HTTPError", "sleep"]

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl