PyPI - synth-ai - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

synth-ai 0.2.14py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show

examples/README.md +1 -0
examples/analyze_semantic_words.sh +2 -2
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/SFT_README.md +147 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/multi_step/convert_traces_to_sft.py +84 -0
examples/multi_step/run_sft_qwen30b.sh +45 -0
examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -2
examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
examples/qwen_vl/QUICKSTART.md +327 -0
examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
examples/qwen_vl/README.md +152 -0
examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
examples/qwen_vl/RL_VISION_TESTING.md +333 -0
examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
examples/qwen_vl/SETUP_COMPLETE.md +274 -0
examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
examples/qwen_vl/__init__.py +2 -0
examples/qwen_vl/collect_data_via_cli.md +415 -0
examples/qwen_vl/collect_vision_traces.py +368 -0
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
examples/qwen_vl/configs/filter_vision_test.toml +8 -0
examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
examples/qwen_vl/run_vision_comparison.sh +61 -0
examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
examples/qwen_vl/test_image_validation.py +201 -0
examples/qwen_vl/test_sft_vision_data.py +110 -0
examples/rl/README.md +6 -6
examples/rl/configs/eval_base_qwen.toml +17 -0
examples/rl/configs/eval_rl_qwen.toml +13 -0
examples/rl/configs/rl_from_base_qwen.toml +62 -0
examples/rl/configs/rl_from_base_qwen17.toml +79 -0
examples/rl/configs/rl_from_ft_qwen.toml +37 -0
examples/rl/run_eval.py +436 -0
examples/rl/run_rl_and_save.py +111 -0
examples/rl/task_app/README.md +21 -0
examples/rl/task_app/math_single_step.py +990 -0
examples/rl/task_app/math_task_app.py +111 -0
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +6 -6
examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
examples/sft/evaluate.py +2 -4
examples/sft/export_dataset.py +7 -4
examples/swe/task_app/README.md +33 -3
examples/swe/task_app/grpo_swe_mini.py +4 -1
examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
examples/swe/task_app/hosted/inference/openai_client.py +4 -4
examples/swe/task_app/hosted/policy_routes.py +0 -2
examples/swe/task_app/hosted/rollout.py +0 -8
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
examples/task_apps/enron/__init__.py +1 -0
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +36 -5
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/README.md +3 -3
examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
examples/vlm/crafter_openai_vlm_agent.py +3 -5
examples/vlm/filter_image_rows.py +1 -1
examples/vlm/run_crafter_vlm_benchmark.py +2 -2
examples/warming_up_to_rl/_utils.py +92 -0
examples/warming_up_to_rl/analyze_trace_db.py +1 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
examples/warming_up_to_rl/export_trace_sft.py +174 -60
examples/warming_up_to_rl/readme.md +63 -132
examples/warming_up_to_rl/run_fft_and_save.py +1 -1
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/run_rl_and_save.py +1 -1
examples/warming_up_to_rl/task_app/README.md +42 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
synth_ai/__init__.py +44 -30
synth_ai/_utils/__init__.py +47 -0
synth_ai/_utils/base_url.py +10 -0
synth_ai/_utils/http.py +10 -0
synth_ai/_utils/prompts.py +10 -0
synth_ai/_utils/task_app_state.py +12 -0
synth_ai/_utils/user_config.py +10 -0
synth_ai/api/models/supported.py +144 -7
synth_ai/api/train/__init__.py +13 -1
synth_ai/api/train/builders.py +9 -3
synth_ai/api/train/cli.py +155 -17
synth_ai/api/train/config_finder.py +18 -11
synth_ai/api/train/configs/__init__.py +8 -1
synth_ai/api/train/configs/rl.py +32 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/env_resolver.py +13 -10
synth_ai/auth/credentials.py +119 -0
synth_ai/cli/__init__.py +61 -69
synth_ai/cli/_modal_wrapper.py +7 -5
synth_ai/cli/_typer_patch.py +0 -2
synth_ai/cli/_validate_task_app.py +22 -4
synth_ai/cli/commands/__init__.py +17 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/deploy/__init__.py +23 -0
synth_ai/cli/commands/deploy/core.py +614 -0
synth_ai/cli/commands/deploy/errors.py +72 -0
synth_ai/cli/commands/deploy/validation.py +11 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1109 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +388 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +73 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +199 -0
synth_ai/cli/commands/train/judge_validation.py +304 -0
synth_ai/cli/commands/train/validation.py +443 -0
synth_ai/cli/demo.py +2 -162
synth_ai/cli/deploy/__init__.py +28 -0
synth_ai/cli/deploy/core.py +5 -0
synth_ai/cli/deploy/errors.py +23 -0
synth_ai/cli/deploy/validation.py +5 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +3 -1
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/task_app_discovery.py +7 -0
synth_ai/cli/lib/task_app_env.py +518 -0
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/recent.py +2 -1
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +21 -0
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +7 -0
synth_ai/cli/task_app_list.py +25 -0
synth_ai/cli/task_app_modal_serve.py +11 -0
synth_ai/cli/task_app_serve.py +11 -0
synth_ai/cli/task_apps.py +110 -1499
synth_ai/cli/traces.py +1 -1
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +5 -0
synth_ai/cli/turso.py +1 -1
synth_ai/cli/watch.py +1 -1
synth_ai/demos/__init__.py +10 -0
synth_ai/demos/core/__init__.py +28 -1
synth_ai/demos/crafter/__init__.py +1 -0
synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/demos/demo_registry.py +176 -0
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/math/__init__.py +1 -0
synth_ai/demos/math/_common.py +16 -0
synth_ai/demos/math/app.py +38 -0
synth_ai/demos/math/config.toml +76 -0
synth_ai/demos/math/deploy_modal.py +54 -0
synth_ai/demos/math/modal_task_app.py +702 -0
synth_ai/demos/math/task_app_entry.py +51 -0
synth_ai/environments/environment/core.py +7 -1
synth_ai/environments/examples/bandit/engine.py +0 -1
synth_ai/environments/examples/bandit/environment.py +0 -1
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/environments/examples/wordle/environment.py +0 -1
synth_ai/evals/base.py +16 -5
synth_ai/evals/client.py +1 -1
synth_ai/http.py +8 -22
synth_ai/inference/client.py +1 -1
synth_ai/judge_schemas.py +4 -5
synth_ai/learning/client.py +1 -1
synth_ai/learning/health.py +1 -1
synth_ai/learning/jobs.py +1 -1
synth_ai/learning/rl/client.py +4 -2
synth_ai/learning/rl/env_keys.py +1 -1
synth_ai/learning/rl/secrets.py +1 -1
synth_ai/learning/sft/client.py +1 -1
synth_ai/learning/sft/data.py +407 -4
synth_ai/learning/validators.py +4 -1
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +469 -0
synth_ai/streaming/streamer.py +301 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +4 -2
synth_ai/task/config.py +6 -4
synth_ai/task/rubrics/__init__.py +1 -2
synth_ai/task/rubrics/loaders.py +14 -10
synth_ai/task/rubrics.py +219 -0
synth_ai/task/trace_correlation_helpers.py +24 -11
synth_ai/task/tracing_utils.py +14 -3
synth_ai/task/validators.py +0 -1
synth_ai/tracing_v3/abstractions.py +3 -3
synth_ai/tracing_v3/config.py +15 -13
synth_ai/tracing_v3/constants.py +21 -0
synth_ai/tracing_v3/db_config.py +3 -1
synth_ai/tracing_v3/decorators.py +10 -7
synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/session_tracer.py +7 -7
synth_ai/tracing_v3/storage/base.py +29 -29
synth_ai/tracing_v3/storage/config.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +8 -9
synth_ai/tracing_v3/turso/native_manager.py +80 -72
synth_ai/tracing_v3/utils.py +2 -2
synth_ai/utils/__init__.py +101 -0
synth_ai/utils/base_url.py +94 -0
synth_ai/utils/cli.py +131 -0
synth_ai/utils/env.py +294 -0
synth_ai/utils/http.py +172 -0
synth_ai/utils/modal.py +308 -0
synth_ai/utils/process.py +212 -0
synth_ai/utils/prompts.py +39 -0
synth_ai/utils/sqld.py +122 -0
synth_ai/utils/task_app_discovery.py +882 -0
synth_ai/utils/task_app_env.py +186 -0
synth_ai/utils/task_app_state.py +318 -0
synth_ai/utils/user_config.py +137 -0
synth_ai/v0/config/__init__.py +1 -5
synth_ai/v0/config/base_url.py +1 -7
synth_ai/v0/tracing/config.py +1 -1
synth_ai/v0/tracing/decorators.py +1 -1
synth_ai/v0/tracing/upload.py +1 -1
synth_ai/v0/tracing_v1/config.py +1 -1
synth_ai/v0/tracing_v1/decorators.py +1 -1
synth_ai/v0/tracing_v1/upload.py +1 -1
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
synth_ai/cli/man.py +0 -106
synth_ai/cli/tui.py +0 -57
synth_ai/compound/cais.py +0 -0
synth_ai/core/experiment.py +0 -13
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -295
synth_ai/handshake.py +0 -109
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -906
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0

synth_ai/demos/math/task_app_entry.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Task app registry entry for the math demo Modal deployment."""
+from __future__ import annotations
+from contextlib import suppress
+from importlib import import_module
+from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
+try:
+    from synth_ai.task.apps.math_single_step import build_config as base_build_config
+except ModuleNotFoundError:
+    base_module = import_module("examples.rl.task_app.math_single_step")
+    base_build_config = base_module.build_config
+DEMO_MODAL_CONFIG = ModalDeploymentConfig(
+    app_name="hendrycks-math-task-app",
+    pip_packages=(
+        "fastapi>=0.110.0",
+        "uvicorn>=0.23.0",
+        "pydantic>=2.6.0",
+        "httpx>=0.24.0",
+        "numpy>=1.24.0",
+        "aiohttp>=3.8.0",
+        "datasets>=2.16.0",
+        "synth-ai",
+    ),
+)
+def build_config():
+    """Reuse the shared math single-step TaskAppConfig."""
+    return base_build_config()
+def register_demo_entry() -> None:
+    entry = TaskAppEntry(
+        app_id="hendrycks-math-demo",
+        description="Demo math task app (Modal-focused) shipping with synth-ai demos.",
+        config_factory=build_config,
+        modal=DEMO_MODAL_CONFIG,
+    )
+    with suppress(ValueError):
+        register_task_app(entry=entry)
+register_demo_entry()
+__all__ = ["DEMO_MODAL_CONFIG", "build_config", "register_demo_entry"]

synth_ai/environments/environment/core.py CHANGED Viewed

@@ -1,4 +1,10 @@
-from synth_ai.core.system import System
+class System:
+    """Minimal base data structure shared by environment types."""
+    id: str
+    name: str
+    description: str
+    pass
 class Environment(System):

synth_ai/environments/examples/bandit/engine.py CHANGED Viewed

@@ -4,7 +4,6 @@ from dataclasses import dataclass
 from typing import Any
 import numpy as np
 from synth_ai.environments.environment.shared_engine import (
     GetObservationCallable,
     InternalObservation,

synth_ai/environments/examples/bandit/environment.py CHANGED Viewed

@@ -3,7 +3,6 @@ from __future__ import annotations
 from typing import Any
 from pydantic import BaseModel, Field, ValidationError
 from synth_ai.environments.environment.shared_engine import (
     GetObservationCallable,
     InternalObservation,

synth_ai/environments/examples/red/engine.py CHANGED Viewed

@@ -14,12 +14,15 @@ from synth_ai.environments.stateful.engine import StatefulEngine, StatefulEngine
 from synth_ai.environments.tasks.core import TaskInstance
 from .engine_helpers.reward_components import (
-    BadgeRewardComponent,
-    BattleVictoryComponent,
-    LevelUpComponent,
-    MapTransitionComponent,
+    RouteExplorationReward,
+    StrategicTrainingReward,
+    BattleProgressionReward,
+    GymPreparationReward,
+    ItemCollectionReward,
+    HealingManagementReward,
+    EfficientExplorationReward,
+    BadgeVictoryReward,
     StepPenaltyComponent,
-    XPGainComponent,
 )
 from .engine_helpers.state_extraction import extract_game_state
@@ -268,15 +271,27 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
             # For testing purposes, use None emulator
             self.emulator = None
-        # Initialize reward stack with dense components
+        # Initialize reward stack with comprehensive progress-based components
         self.reward_stack = RewardStack(
             components=[
-                BadgeRewardComponent(),
-                MapTransitionComponent(),
-                BattleVictoryComponent(),
-                LevelUpComponent(),
-                XPGainComponent(),
-                StepPenaltyComponent(),
+                # Major progress rewards
+                BadgeVictoryReward(),        # +50.0 for Boulder Badge (main goal)
+                RouteExplorationReward(),    # +1.0-5.0 for reaching key areas
+                GymPreparationReward(),      # +3.0 for being gym-ready
+                # Training and battle rewards
+                StrategicTrainingReward(),   # +0.2-3.0 for level ups and milestones
+                BattleProgressionReward(),   # +0.1-1.0 for battles
+                # Resource management rewards
+                ItemCollectionReward(),      # +0.1-0.5 for collecting items
+                HealingManagementReward(),   # +0.05-0.8 for healing Pokemon
+                # Exploration efficiency
+                EfficientExplorationReward(), # +0.02 for discovering new positions
+                        # No penalty for unproductive actions
+                        StepPenaltyComponent(penalty=0.0),        # 0.0 per step
             ]
         )
@@ -640,6 +655,12 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
                         "prev_text_box_active": bool(prev_state.get("text_box_active", False)),
                         "prev_enemy_hp_current": int(prev_state.get("enemy_hp_current", 0)),
                         "prev_enemy_hp_percentage": float(prev_state.get("enemy_hp_percentage", 0.0)),
+                        "prev_player_x": int(prev_state.get("player_x", 0)),
+                        "prev_player_y": int(prev_state.get("player_y", 0)),
+                        "prev_party": prev_state.get("party", []),
+                        "prev_inventory": prev_state.get("inventory", []),
+                        "prev_party_hp_current": int(prev_state.get("party_hp_current", 0)),
+                        "prev_party_hp_max": int(prev_state.get("party_hp_max", 0)),
                     },
                 )
             except Exception as e:

synth_ai/environments/examples/red/engine_helpers/reward_components.py CHANGED Viewed

@@ -3,274 +3,246 @@ from typing import Any, Dict, Set
 from synth_ai.environments.environment.rewards.core import RewardComponent
-class BadgeRewardComponent(RewardComponent):
-    """Reward for earning gym badges"""
+# ===== COMPREHENSIVE POKEMON RED PROGRESS REWARD SYSTEM =====
+# Designed for deterministic rewards that guide toward beating Brock at Pewter Gym
-    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_badges = action.get("prev_badges", 0)
-        current_badges = state["badges"]
-        new_badges = current_badges & ~prev_badges
-        badge_count = bin(new_badges).count("1")
-        return badge_count * 1.0
+class RouteExplorationReward(RewardComponent):
+    """High rewards for reaching key areas on the path to Pewter Gym - guides exploration"""
-class MapTransitionComponent(RewardComponent):
-    """Reward for exploring new areas"""
+    def __init__(self):
+        self.key_areas_reached: Set[int] = set()
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_map = action.get("prev_map_id", -1)
         current_map = state["map_id"]
-        return 0.1 if current_map != prev_map else 0.0
+        prev_map = action.get("prev_map_id", -1)
-class BattleVictoryComponent(RewardComponent):
-    """Reward for winning battles"""
+        # Key maps and rewards for progressing toward Pewter Gym
+        area_rewards = {
+            0: 0.0,  # Pallet Town (starting point)
+            1: 2.0,  # Route 1 - First step out of town (+2.0)
+            2: 1.5,  # Viridian City - Major hub (+1.5)
+            3: 1.0,  # Route 22 - Path to League (+1.0)
+            4: 1.0,  # Route 2 - To Viridian Forest (+1.0)
+            5: 2.0,  # Viridian Forest - Dense area (+2.0)
+            6: 1.5,  # Pewter City - Target city (+1.5)
+            7: 5.0,  # Pewter Gym - GOAL AREA (+5.0 for entering gym)
+        }
+        if current_map in area_rewards and current_map not in self.key_areas_reached:
+            if prev_map != current_map:  # Only reward when actually entering new area
+                self.key_areas_reached.add(current_map)
+                return area_rewards[current_map]
-    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_in_battle = action.get("prev_in_battle", False)
-        current_in_battle = state["in_battle"]
-        battle_outcome = state["battle_outcome"]
-        # Transitioning from battle to not in battle with victory
-        if prev_in_battle and not current_in_battle and battle_outcome == 1:
-            return 0.5
         return 0.0
-class LevelUpComponent(RewardComponent):
-    """Reward for Pokemon leveling up"""
+class StrategicTrainingReward(RewardComponent):
+    """Rewards for building Pokemon strength strategically"""
+    def __init__(self):
+        self.level_milestones: Set[int] = set()
+        self.last_level = 0
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
+        current_level = state.get("party_level", 0)
         prev_level = action.get("prev_party_level", 0)
-        current_level = state["party_level"]
-        level_gain = max(0, current_level - prev_level)
-        return level_gain * 0.3
+        # Reward reaching key level milestones
+        milestone_rewards = {
+            8: 1.0,   # Level 8 - Good for early battles
+            12: 2.0,  # Level 12 - Ready for Brock
+            15: 3.0,  # Level 15 - Strong Pokemon
+        }
-class XPGainComponent(RewardComponent):
-    """Small reward for XP gains"""
+        if current_level > prev_level and current_level in milestone_rewards:
+            if current_level not in self.level_milestones:
+                self.level_milestones.add(current_level)
+                return milestone_rewards[current_level]
-    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_xp = action.get("prev_party_xp", 0)
-        current_xp = state["party_xp"]
-        xp_gain = max(0, current_xp - prev_xp)
-        return xp_gain * 0.001  # Very small multiplier
+        # Small reward for any level up (0.2 points)
+        if current_level > prev_level:
+            return 0.2
+        return 0.0
-class StepPenaltyComponent(RewardComponent):
-    """Small penalty for each step to encourage efficiency"""
-    def __init__(self, penalty: float = -0.001):
-        self.penalty = penalty
+class BattleProgressionReward(RewardComponent):
+    """Rewards for winning battles and gaining experience"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        return self.penalty
+        prev_in_battle = action.get("prev_in_battle", False)
+        current_in_battle = state.get("in_battle", False)
+        battle_outcome = state.get("battle_outcome", 0)
+        # Large reward for battle victory (+1.0)
+        if prev_in_battle and not current_in_battle and battle_outcome == 1:
+            return 1.0
-class MenuPenaltyComponent(RewardComponent):
-    """Penalty for excessive menu usage"""
+        # Small reward for entering battle (+0.1) - shows engagement
+        if not prev_in_battle and current_in_battle:
+            return 0.1
-    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        # This would need more sophisticated menu tracking
         return 0.0
-# ===== NEW EARLY GAME PALLET TOWN REWARDS =====
-class ExitHouseReward(RewardComponent):
-    """High reward for first time leaving the starting house - +2.0 points"""
+class GymPreparationReward(RewardComponent):
+    """Rewards for preparing to challenge Brock"""
     def __init__(self):
-        self.house_exited = False
+        self.prepared_for_gym = False
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.house_exited:
+        if self.prepared_for_gym:
             return 0.0
-        prev_map = action.get("prev_map_id", -1)
-        current_map = state["map_id"]
+        # Check if in Pewter City area and have decent Pokemon
+        if state["map_id"] in [6, 7]:  # Pewter City or Gym
+            party_level = state.get("party_level", 0)
+            party_count = len(state.get("party", []))
+            # Reward being prepared for gym battle
+            if party_level >= 10 and party_count >= 1:
+                self.prepared_for_gym = True
+                return 3.0  # Significant reward for being gym-ready
-        # Exit from house to town (assuming house maps are 1,2 and town is 0)
-        if prev_map in [1, 2] and current_map == 0:
-            self.house_exited = True
-            return 2.0
         return 0.0
-class NPCInteractionReward(RewardComponent):
-    """Reward for talking to NPCs - +0.8 points per unique NPC"""
+class ItemCollectionReward(RewardComponent):
+    """Rewards for collecting useful items"""
     def __init__(self):
-        self.npcs_talked_to: Set[tuple] = set()
+        self.items_collected: Set[int] = set()
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        # Detect NPC conversations
-        if state["text_box_active"] and not action.get("prev_text_box_active", False):
-            # Use position as NPC identifier
-            npc_key = (state["player_x"], state["player_y"], state["map_id"])
-            if npc_key not in self.npcs_talked_to:
-                self.npcs_talked_to.add(npc_key)
-                return 0.8
-        return 0.0
+        prev_inventory = action.get("prev_inventory", [])
+        current_inventory = state.get("inventory", [])
+        # Check for new items
+        prev_item_ids = {item["item_id"] for item in prev_inventory}
+        current_item_ids = {item["item_id"] for item in current_inventory}
-class OakLabDiscoveryReward(RewardComponent):
-    """High reward for finding and entering Oak's lab - +2.5 points"""
+        new_items = current_item_ids - prev_item_ids
-    def __init__(self):
-        self.lab_discovered = False
+        # Reward valuable items for gym preparation
+        valuable_items = {1, 2, 3, 4, 5, 10, 11, 12, 13}  # Potions, Balls, etc.
+        reward = 0.0
+        for item_id in new_items:
+            if item_id not in self.items_collected:
+                self.items_collected.add(item_id)
+                if item_id in valuable_items:
+                    reward += 0.5  # +0.5 per valuable item
+                else:
+                    reward += 0.1  # +0.1 per other item
+        return reward
+class HealingManagementReward(RewardComponent):
+    """Rewards for keeping Pokemon healthy"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.lab_discovered:
+        prev_party = action.get("prev_party", [])
+        current_party = state.get("party", [])
+        if not prev_party or not current_party:
             return 0.0
-        prev_map = action.get("prev_map_id", -1)
-        current_map = state["map_id"]
+        # Reward healing Pokemon back to full health
+        prev_hp_pct = sum(p.get("hp_percentage", 0) for p in prev_party) / len(prev_party)
+        current_hp_pct = sum(p.get("hp_percentage", 0) for p in current_party) / len(current_party)
+        # Significant improvement in health
+        if current_hp_pct > prev_hp_pct + 20:  # Healed at least 20% overall
+            return 0.8
+        # Small reward for maintaining good health
+        if current_hp_pct >= 80 and prev_hp_pct >= 80:
+            return 0.05
-        # Entering Oak's lab (assuming map 3)
-        if prev_map == 0 and current_map == 3:
-            self.lab_discovered = True
-            return 2.5
         return 0.0
-class StarterPokemonReward(RewardComponent):
-    """Very high reward for getting first Pokemon - +10.0 points"""
+class EfficientExplorationReward(RewardComponent):
+    """Rewards for exploring efficiently without getting lost"""
     def __init__(self):
-        self.starter_obtained = False
+        self.positions_visited: Set[tuple] = set()
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.starter_obtained:
-            return 0.0
+        # Track unique positions visited in each map
+        position_key = (state["map_id"], state["player_x"], state["player_y"])
-        # Detect getting first Pokemon
-        prev_party_count = len(action.get("prev_party", []))
-        current_party_count = len(state.get("party", []))
+        if position_key not in self.positions_visited:
+            self.positions_visited.add(position_key)
+            return 0.02  # Small reward for discovering new areas
-        if prev_party_count == 0 and current_party_count == 1:
-            if state["map_id"] == 3:  # In Oak's lab
-                self.starter_obtained = True
-                return 10.0
         return 0.0
-class FirstBattleReward(RewardComponent):
-    """High reward for engaging in first battle - +5.0 points"""
-    def __init__(self):
-        self.first_battle = False
+class BadgeVictoryReward(RewardComponent):
+    """HUGE reward for achieving the main goal - Boulder Badge"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.first_battle:
-            return 0.0
+        prev_badges = action.get("prev_badges", 0)
+        current_badges = state.get("badges", 0)
-        prev_in_battle = action.get("prev_in_battle", False)
-        current_in_battle = state["in_battle"]
+        # Check if Boulder Badge (bit 0) was newly earned
+        boulder_badge_mask = 0x01
+        prev_has_badge = prev_badges & boulder_badge_mask
+        current_has_badge = current_badges & boulder_badge_mask
+        if not prev_has_badge and current_has_badge:
+            return 50.0  # MASSIVE reward for completing the main objective
-        if not prev_in_battle and current_in_battle:
-            self.first_battle = True
-            return 5.0
         return 0.0
-class DirectionExplorationReward(RewardComponent):
-    """Reward for trying all movement directions - +1.0 points when complete"""
+class StepPenaltyComponent(RewardComponent):
+    """Small penalty for each step to encourage efficiency"""
-    def __init__(self):
-        self.directions_tried: Set[str] = set()
-        self.reward_given = False
+    def __init__(self, penalty: float = 0.0):  # Changed from -0.005 to 0.0
+        self.penalty = penalty
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.reward_given:
-            return 0.0
+        return self.penalty
-        # Track movement directions based on position changes
-        prev_x = action.get("prev_player_x", state["player_x"])
-        prev_y = action.get("prev_player_y", state["player_y"])
-        current_x = state["player_x"]
-        current_y = state["player_y"]
-        if current_x > prev_x:
-            self.directions_tried.add("RIGHT")
-        elif current_x < prev_x:
-            self.directions_tried.add("LEFT")
-        elif current_y > prev_y:
-            self.directions_tried.add("DOWN")
-        elif current_y < prev_y:
-            self.directions_tried.add("UP")
-        if len(self.directions_tried) >= 4:
-            self.reward_given = True
-            return 1.0
-        return 0.0
+# ===== LEGACY COMPONENTS (kept for compatibility) =====
-class BuildingExplorationReward(RewardComponent):
-    """Reward for entering different buildings - +0.5 points per building"""
-    def __init__(self):
-        self.buildings_entered: Set[int] = set()
+class BadgeRewardComponent(RewardComponent):
+    """Legacy badge reward - now handled by BadgeVictoryReward"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        prev_map = action.get("prev_map_id", -1)
-        current_map = state["map_id"]
+        return 0.0  # Handled by BadgeVictoryReward
-        # Entering a new building from town
-        if (
-            prev_map == 0 and current_map > 0 and current_map not in [1, 2]
-        ):  # From town to new building
-            if current_map not in self.buildings_entered:
-                self.buildings_entered.add(current_map)
-                return 0.5
-        return 0.0
+class MapTransitionComponent(RewardComponent):
+    """Legacy map transition - now handled by RouteExplorationReward"""
+    async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
+        return 0.0  # Handled by RouteExplorationReward
-class ObjectInteractionReward(RewardComponent):
-    """Reward for pressing A on various objects - +0.3 points per object"""
-    def __init__(self):
-        self.objects_interacted: Set[tuple] = set()
+class BattleVictoryComponent(RewardComponent):
+    """Legacy battle victory - now handled by BattleProgressionReward"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        # Detect A button interactions that trigger text
-        if state["text_box_active"] and not action.get("prev_text_box_active", False):
-            object_key = (state["player_x"], state["player_y"], state["map_id"])
-            if object_key not in self.objects_interacted:
-                self.objects_interacted.add(object_key)
-                return 0.3
-        return 0.0
+        return 0.0  # Handled by BattleProgressionReward
-class TownExplorationReward(RewardComponent):
-    """Reward for thorough town exploration - +0.1 per new position"""
-    def __init__(self):
-        self.positions_visited: Set[tuple] = set()
+class LevelUpComponent(RewardComponent):
+    """Legacy level up - now handled by StrategicTrainingReward"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if state["map_id"] == 0:  # In Pallet Town
-            position_key = (state["player_x"], state["player_y"])
-            if position_key not in self.positions_visited:
-                self.positions_visited.add(position_key)
-                return 0.1
-        return 0.0
+        return 0.0  # Handled by StrategicTrainingReward
-class RouteAttemptReward(RewardComponent):
-    """Reward for trying to leave town (triggers story) - +3.0 points"""
-    def __init__(self):
-        self.route_attempted = False
+class XPGainComponent(RewardComponent):
+    """Legacy XP gain - now handled by StrategicTrainingReward"""
     async def score(self, state: Dict[str, Any], action: Dict[str, Any]) -> float:
-        if self.route_attempted:
-            return 0.0
-        # Detect reaching the edge of Pallet Town (attempting to go north)
-        if state["map_id"] == 0:  # In Pallet Town
-            if state["player_y"] <= 1:  # At northern edge
-                self.route_attempted = True
-                return 3.0
-        return 0.0
+        return 0.0  # Handled by StrategicTrainingReward

synth_ai/environments/examples/red/environment.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 from typing import Any, Dict, List, Optional, Union
 import base64
+import time
 from io import BytesIO
 from pydantic import BaseModel, Field
@@ -19,6 +20,8 @@ from synth_ai.environments.environment.tools import (
 )
 from synth_ai.environments.reproducibility.core import ReproducibleEnvironment
 from synth_ai.environments.stateful.core import StatefulEnvironment
+from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
+from synth_ai.tracing_v3.session_tracer import SessionTracer
 try:  # optional for image encoding
     import numpy as _np  # type: ignore
     from PIL import Image as _PILImage  # type: ignore
@@ -121,6 +124,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
         task_instance: Optional[PokemonRedTaskInstance] = None,
         custom_step_obs: Optional[GetObservationCallable] = None,
         custom_ckpt_obs: Optional[GetObservationCallable] = None,
+        tracer: Optional[SessionTracer] = None,
     ):
         self.name = "PokemonRed"
         self.task_instance = task_instance or DEFAULT_TASK_INSTANCE
@@ -129,6 +133,7 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
             custom_ckpt_obs or PokemonRedObservationCallable()
         )
         self.engine = PokemonRedEngine(self.task_instance)
+        self.tracer = tracer
         # Register tools
         self._press_button_tool = PressButtonTool(self.engine)
@@ -203,6 +208,27 @@ class PokemonRedEnvironment(StatefulEnvironment, ReproducibleEnvironment[Pokemon
                 if tool_result.error and hasattr(pub_state, "error_info"):
                     pub_state.error_info = tool_result.error
+        # Record EnvironmentEvent for tracing if tracer is available
+        if self.tracer and hasattr(priv_state, 'reward_last_step'):
+            # Get state information for the event
+            prev_state = getattr(self.engine, '_previous_state', None)
+            terminated = getattr(priv_state, 'terminated', False)
+            truncated = getattr(priv_state, 'truncated', False)
+            # Convert states to dict for serialization
+            pub_state_dict = pub_state.__dict__ if hasattr(pub_state, '__dict__') else pub_state
+            env_event = EnvironmentEvent(
+                system_instance_id="pokemon_red_env",
+                time_record=TimeRecord(event_time=time.time()),
+                reward=float(priv_state.reward_last_step),
+                terminated=terminated,
+                truncated=truncated,
+                system_state_before=prev_state if prev_state else None,
+                system_state_after=pub_state_dict,
+            )
+            await self.tracer.record_event(env_event)
         return await self._to_observation(
             priv_state, pub_state, self.custom_step_observation_callable
         )

synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.14py3-none-any.whl → 0.2.17py3-none-any.whl