synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +63 -0
- synth_ai/api/train/builders.py +473 -0
- synth_ai/api/train/cli.py +1185 -0
- synth_ai/api/train/config_finder.py +246 -0
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +352 -0
- synth_ai/api/train/pollers.py +91 -0
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +195 -0
- synth_ai/api/train/utils.py +244 -0
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +90 -45
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +166 -114
- synth_ai/cli/root.py +143 -112
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +3134 -0
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +745 -416
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +7 -1
- synth_ai/demos/demo_task_apps/core.py +75 -37
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/config.toml +55 -110
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +116 -3
- synth_ai/task/apps/__init__.py +132 -0
- synth_ai/task/auth.py +165 -0
- synth_ai/task/client.py +167 -0
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +173 -57
- synth_ai/task/datasets.py +108 -0
- synth_ai/task/errors.py +50 -0
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +111 -0
- synth_ai/task/proxy.py +251 -0
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/server.py +432 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +95 -0
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +59 -0
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +86 -21
- synth_ai/tracing_v3/storage/base.py +98 -12
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -63
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -760
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
- synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Pytest tests for server-based map validation
|
|
4
|
+
Tests different game states and saves reference outputs for regression testing
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
import requests
|
|
9
|
+
import time
|
|
10
|
+
import subprocess
|
|
11
|
+
import os
|
|
12
|
+
import json
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from tests.test_memory_map import format_map_data
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ServerMapTester:
|
|
18
|
+
"""Helper class for testing server-based map reading"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, port=8010):
|
|
21
|
+
self.port = port
|
|
22
|
+
self.server_url = f"http://127.0.0.1:{port}"
|
|
23
|
+
self.server_process = None
|
|
24
|
+
|
|
25
|
+
def start_server(self, state_file):
|
|
26
|
+
"""Start server with a specific state file"""
|
|
27
|
+
self.stop_server() # Ensure clean state
|
|
28
|
+
|
|
29
|
+
server_cmd = [
|
|
30
|
+
"python", "-m", "server.app",
|
|
31
|
+
"--load-state", state_file,
|
|
32
|
+
"--port", str(self.port),
|
|
33
|
+
"--manual"
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
self.server_process = subprocess.Popen(
|
|
37
|
+
server_cmd,
|
|
38
|
+
stdout=subprocess.PIPE,
|
|
39
|
+
stderr=subprocess.PIPE,
|
|
40
|
+
text=True
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Wait for server to start
|
|
44
|
+
for i in range(30):
|
|
45
|
+
try:
|
|
46
|
+
response = requests.get(f"{self.server_url}/status", timeout=2)
|
|
47
|
+
if response.status_code == 200:
|
|
48
|
+
return True
|
|
49
|
+
except requests.exceptions.RequestException:
|
|
50
|
+
time.sleep(1)
|
|
51
|
+
|
|
52
|
+
self.stop_server()
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
def stop_server(self):
|
|
56
|
+
"""Stop the server process"""
|
|
57
|
+
if self.server_process:
|
|
58
|
+
self.server_process.terminate()
|
|
59
|
+
try:
|
|
60
|
+
self.server_process.wait(timeout=5)
|
|
61
|
+
except subprocess.TimeoutExpired:
|
|
62
|
+
self.server_process.kill()
|
|
63
|
+
self.server_process.wait()
|
|
64
|
+
self.server_process = None
|
|
65
|
+
|
|
66
|
+
def get_map_data(self):
|
|
67
|
+
"""Get current map data from server"""
|
|
68
|
+
try:
|
|
69
|
+
response = requests.get(f"{self.server_url}/state", timeout=10)
|
|
70
|
+
if response.status_code == 200:
|
|
71
|
+
state = response.json()
|
|
72
|
+
return {
|
|
73
|
+
'location': state['player']['location'],
|
|
74
|
+
'position': state['player']['position'],
|
|
75
|
+
'tiles': state['map']['tiles']
|
|
76
|
+
}
|
|
77
|
+
except Exception as e:
|
|
78
|
+
pytest.fail(f"Failed to get map data: {e}")
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
def execute_actions(self, actions):
|
|
82
|
+
"""Execute a sequence of actions"""
|
|
83
|
+
for action in actions:
|
|
84
|
+
try:
|
|
85
|
+
response = requests.post(f"{self.server_url}/action", json=action, timeout=5)
|
|
86
|
+
if response.status_code != 200:
|
|
87
|
+
pytest.fail(f"Action failed: {action}, status: {response.status_code}")
|
|
88
|
+
time.sleep(0.3) # Allow action to process
|
|
89
|
+
except Exception as e:
|
|
90
|
+
pytest.fail(f"Failed to execute action {action}: {e}")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@pytest.fixture
|
|
94
|
+
def server_tester():
|
|
95
|
+
"""Pytest fixture providing a server tester instance"""
|
|
96
|
+
tester = ServerMapTester()
|
|
97
|
+
yield tester
|
|
98
|
+
tester.stop_server()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def save_reference_map(location_name, map_data, reference_dir):
|
|
102
|
+
"""Save map data as reference for future comparisons"""
|
|
103
|
+
reference_dir = Path(reference_dir)
|
|
104
|
+
reference_dir.mkdir(exist_ok=True)
|
|
105
|
+
|
|
106
|
+
# Clean filename
|
|
107
|
+
filename = location_name.replace(' ', '_').replace("'", '').lower()
|
|
108
|
+
filename = f"{filename}_reference.json"
|
|
109
|
+
|
|
110
|
+
reference_file = reference_dir / filename
|
|
111
|
+
|
|
112
|
+
reference_data = {
|
|
113
|
+
'location': map_data['location'],
|
|
114
|
+
'position': map_data['position'],
|
|
115
|
+
'tiles': map_data['tiles'],
|
|
116
|
+
'formatted_map': format_map_data(map_data['tiles'], map_data['location'])
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
with open(reference_file, 'w') as f:
|
|
120
|
+
json.dump(reference_data, f, indent=2)
|
|
121
|
+
|
|
122
|
+
return reference_file
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def compare_with_reference(current_map, reference_file):
|
|
126
|
+
"""Compare current map with saved reference"""
|
|
127
|
+
if not reference_file.exists():
|
|
128
|
+
return False, f"Reference file {reference_file} does not exist"
|
|
129
|
+
|
|
130
|
+
with open(reference_file, 'r') as f:
|
|
131
|
+
reference = json.load(f)
|
|
132
|
+
|
|
133
|
+
# Compare location
|
|
134
|
+
if current_map['location'] != reference['location']:
|
|
135
|
+
return False, f"Location mismatch: {current_map['location']} != {reference['location']}"
|
|
136
|
+
|
|
137
|
+
# Compare map dimensions
|
|
138
|
+
current_tiles = current_map['tiles']
|
|
139
|
+
reference_tiles = reference['tiles']
|
|
140
|
+
|
|
141
|
+
if len(current_tiles) != len(reference_tiles):
|
|
142
|
+
return False, f"Height mismatch: {len(current_tiles)} != {len(reference_tiles)}"
|
|
143
|
+
|
|
144
|
+
if len(current_tiles[0]) != len(reference_tiles[0]):
|
|
145
|
+
return False, f"Width mismatch: {len(current_tiles[0])} != {len(reference_tiles[0])}"
|
|
146
|
+
|
|
147
|
+
# Compare tile data (allow some tolerance for minor differences)
|
|
148
|
+
differences = 0
|
|
149
|
+
total_tiles = len(current_tiles) * len(current_tiles[0])
|
|
150
|
+
|
|
151
|
+
for y, (current_row, reference_row) in enumerate(zip(current_tiles, reference_tiles)):
|
|
152
|
+
for x, (current_tile, reference_tile) in enumerate(zip(current_row, reference_row)):
|
|
153
|
+
if current_tile != reference_tile:
|
|
154
|
+
differences += 1
|
|
155
|
+
|
|
156
|
+
difference_ratio = differences / total_tiles if total_tiles > 0 else 0
|
|
157
|
+
|
|
158
|
+
# Allow up to 5% differences for minor variations
|
|
159
|
+
if difference_ratio > 0.05:
|
|
160
|
+
return False, f"Too many tile differences: {differences}/{total_tiles} ({difference_ratio:.1%})"
|
|
161
|
+
|
|
162
|
+
return True, f"Maps match (differences: {differences}/{total_tiles}, {difference_ratio:.1%})"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class TestServerMapValidation:
|
|
166
|
+
"""Test server-based map reading for different scenarios"""
|
|
167
|
+
|
|
168
|
+
def test_house_state_map(self, server_tester):
|
|
169
|
+
"""Test map reading from house state"""
|
|
170
|
+
assert server_tester.start_server("tests/states/house.state"), "Failed to start server"
|
|
171
|
+
|
|
172
|
+
map_data = server_tester.get_map_data()
|
|
173
|
+
assert map_data is not None, "Failed to get map data"
|
|
174
|
+
|
|
175
|
+
# Validate basic properties
|
|
176
|
+
assert "BRENDAN" in map_data['location'].upper(), f"Unexpected location: {map_data['location']}"
|
|
177
|
+
assert "HOUSE" in map_data['location'].upper(), f"Not in house: {map_data['location']}"
|
|
178
|
+
assert len(map_data['tiles']) > 0, "Empty map tiles"
|
|
179
|
+
|
|
180
|
+
# Save as reference
|
|
181
|
+
reference_file = save_reference_map(map_data['location'], map_data, "tests/map_references")
|
|
182
|
+
assert reference_file.exists(), "Failed to save reference file"
|
|
183
|
+
|
|
184
|
+
print(f"✅ House state map validated and saved to {reference_file}")
|
|
185
|
+
|
|
186
|
+
def test_upstairs_state_map(self, server_tester):
|
|
187
|
+
"""Test map reading from upstairs state"""
|
|
188
|
+
assert server_tester.start_server("tests/states/upstairs.state"), "Failed to start server"
|
|
189
|
+
|
|
190
|
+
map_data = server_tester.get_map_data()
|
|
191
|
+
assert map_data is not None, "Failed to get map data"
|
|
192
|
+
|
|
193
|
+
# Validate upstairs properties
|
|
194
|
+
assert "2F" in map_data['location'] or "UPSTAIRS" in map_data['location'].upper(), f"Not upstairs: {map_data['location']}"
|
|
195
|
+
|
|
196
|
+
tiles = map_data['tiles']
|
|
197
|
+
assert len(tiles) >= 10, "Map too small"
|
|
198
|
+
assert len(tiles[0]) >= 10, "Map too narrow"
|
|
199
|
+
|
|
200
|
+
# Check for reasonable tile diversity (indoor areas should have various behavior types)
|
|
201
|
+
total_tiles = sum(len(row) for row in tiles)
|
|
202
|
+
behavior_counts = {}
|
|
203
|
+
for row in tiles:
|
|
204
|
+
for tile in row:
|
|
205
|
+
if len(tile) >= 2:
|
|
206
|
+
behavior = tile[1]
|
|
207
|
+
behavior_counts[behavior] = behavior_counts.get(behavior, 0) + 1
|
|
208
|
+
|
|
209
|
+
# Should have at least 3 different behavior types for a proper indoor area
|
|
210
|
+
unique_behaviors = len(behavior_counts)
|
|
211
|
+
assert unique_behaviors >= 3, f"Too few behavior types: {unique_behaviors} (behaviors: {list(behavior_counts.keys())})"
|
|
212
|
+
|
|
213
|
+
# Should not be dominated by a single behavior type (>90%)
|
|
214
|
+
max_behavior_count = max(behavior_counts.values()) if behavior_counts else 0
|
|
215
|
+
dominance_ratio = max_behavior_count / total_tiles if total_tiles > 0 else 0
|
|
216
|
+
assert dominance_ratio < 0.9, f"Single behavior dominates: {dominance_ratio:.1%}"
|
|
217
|
+
|
|
218
|
+
# Save as reference
|
|
219
|
+
reference_file = save_reference_map(map_data['location'], map_data, "tests/map_references")
|
|
220
|
+
assert reference_file.exists(), "Failed to save reference file"
|
|
221
|
+
|
|
222
|
+
print(f"✅ Upstairs state map validated and saved to {reference_file}")
|
|
223
|
+
|
|
224
|
+
def test_house_to_outside_transition(self, server_tester):
|
|
225
|
+
"""Test area transition from house to outside"""
|
|
226
|
+
assert server_tester.start_server("tests/states/house.state"), "Failed to start server"
|
|
227
|
+
|
|
228
|
+
# Get initial house map
|
|
229
|
+
house_map = server_tester.get_map_data()
|
|
230
|
+
assert "HOUSE" in house_map['location'].upper(), f"Not in house: {house_map['location']}"
|
|
231
|
+
|
|
232
|
+
# Move outside
|
|
233
|
+
actions = [{"buttons": ["down"]} for _ in range(3)]
|
|
234
|
+
server_tester.execute_actions(actions)
|
|
235
|
+
|
|
236
|
+
# Get outside map
|
|
237
|
+
outside_map = server_tester.get_map_data()
|
|
238
|
+
assert outside_map is not None, "Failed to get outside map"
|
|
239
|
+
assert "TOWN" in outside_map['location'].upper(), f"Not in town: {outside_map['location']}"
|
|
240
|
+
|
|
241
|
+
# Validate outside map quality
|
|
242
|
+
tiles = outside_map['tiles']
|
|
243
|
+
total_tiles = sum(len(row) for row in tiles)
|
|
244
|
+
unknown_tiles = sum(1 for row in tiles for tile in row if len(tile) >= 2 and tile[1] == 0) # UNKNOWN = 0
|
|
245
|
+
|
|
246
|
+
unknown_ratio = unknown_tiles / total_tiles if total_tiles > 0 else 0
|
|
247
|
+
|
|
248
|
+
# Log the unknown ratio for debugging
|
|
249
|
+
print(f"Outside map unknown ratio: {unknown_ratio:.1%}")
|
|
250
|
+
|
|
251
|
+
# If too many unknown tiles, this indicates the area transition bug
|
|
252
|
+
if unknown_ratio > 0.3:
|
|
253
|
+
print(f"⚠️ DETECTED AREA TRANSITION ISSUE: {unknown_ratio:.1%} unknown tiles")
|
|
254
|
+
print("This test demonstrates that the area transition bug still occurs sometimes")
|
|
255
|
+
# For now, save this as a reference anyway to track the issue
|
|
256
|
+
else:
|
|
257
|
+
print(f"✅ Area transition successful: {unknown_ratio:.1%} unknown tiles")
|
|
258
|
+
|
|
259
|
+
# Save as reference
|
|
260
|
+
reference_file = save_reference_map(outside_map['location'], outside_map, "tests/map_references")
|
|
261
|
+
assert reference_file.exists(), "Failed to save reference file"
|
|
262
|
+
|
|
263
|
+
print(f"✅ House-to-outside transition validated and saved to {reference_file}")
|
|
264
|
+
|
|
265
|
+
def test_regression_against_references(self, server_tester):
|
|
266
|
+
"""Test current maps against saved references"""
|
|
267
|
+
reference_dir = Path("tests/map_references")
|
|
268
|
+
if not reference_dir.exists():
|
|
269
|
+
pytest.skip("No reference files exist yet - run other tests first")
|
|
270
|
+
|
|
271
|
+
reference_files = list(reference_dir.glob("*_reference.json"))
|
|
272
|
+
if not reference_files:
|
|
273
|
+
pytest.skip("No reference files found")
|
|
274
|
+
|
|
275
|
+
# Test each reference
|
|
276
|
+
for reference_file in reference_files:
|
|
277
|
+
with open(reference_file, 'r') as f:
|
|
278
|
+
reference = json.load(f)
|
|
279
|
+
|
|
280
|
+
location = reference['location']
|
|
281
|
+
|
|
282
|
+
# Determine which state file to use based on location
|
|
283
|
+
if "BRENDAN" in location.upper() and "HOUSE" in location.upper() and "2F" not in location:
|
|
284
|
+
state_file = "tests/states/house.state"
|
|
285
|
+
elif "2F" in location or "UPSTAIRS" in location.upper():
|
|
286
|
+
state_file = "tests/states/upstairs.state"
|
|
287
|
+
else:
|
|
288
|
+
# For outdoor locations, start from house and transition
|
|
289
|
+
state_file = "tests/states/house.state"
|
|
290
|
+
|
|
291
|
+
assert server_tester.start_server(state_file), f"Failed to start server for {location}"
|
|
292
|
+
|
|
293
|
+
# If outdoor location, perform transition
|
|
294
|
+
if "TOWN" in location.upper():
|
|
295
|
+
actions = [{"buttons": ["down"]} for _ in range(3)]
|
|
296
|
+
server_tester.execute_actions(actions)
|
|
297
|
+
|
|
298
|
+
current_map = server_tester.get_map_data()
|
|
299
|
+
assert current_map is not None, f"Failed to get map for {location}"
|
|
300
|
+
|
|
301
|
+
# Compare with reference
|
|
302
|
+
matches, message = compare_with_reference(current_map, reference_file)
|
|
303
|
+
assert matches, f"Map regression for {location}: {message}"
|
|
304
|
+
|
|
305
|
+
print(f"✅ Regression test passed for {location}: {message}")
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
if __name__ == "__main__":
|
|
309
|
+
# Run tests manually for development
|
|
310
|
+
import sys
|
|
311
|
+
sys.exit(pytest.main([__file__, "-v", "-s"]))
|
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test for torchic state and milestone reading
|
|
4
|
+
|
|
5
|
+
This test verifies that:
|
|
6
|
+
1. The torchic state loads correctly
|
|
7
|
+
2. The state contains the expected data (player in Littleroot Town, has Torchic)
|
|
8
|
+
3. The milestones are correctly detected and include Littleroot Town
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import pytest
|
|
12
|
+
import subprocess
|
|
13
|
+
import time
|
|
14
|
+
import requests
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
class ServerManager:
|
|
19
|
+
"""Manages server startup and shutdown for tests"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.server_process = None
|
|
23
|
+
|
|
24
|
+
def start_server(self, state_file):
|
|
25
|
+
"""Start the server with a specific state file"""
|
|
26
|
+
print(f"🚀 Starting server with state: {state_file}")
|
|
27
|
+
cmd = ["python", "-m", "server.app", "--manual", "--load-state", state_file]
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
self.server_process = subprocess.Popen(
|
|
31
|
+
cmd,
|
|
32
|
+
stdout=subprocess.PIPE,
|
|
33
|
+
stderr=subprocess.PIPE
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Wait for server to start
|
|
37
|
+
print("⏳ Waiting for server to start...")
|
|
38
|
+
time.sleep(5)
|
|
39
|
+
|
|
40
|
+
# Test if server is responding
|
|
41
|
+
response = requests.get("http://localhost:8000/status", timeout=5)
|
|
42
|
+
if response.status_code == 200:
|
|
43
|
+
print("✅ Server started successfully")
|
|
44
|
+
return True
|
|
45
|
+
else:
|
|
46
|
+
print(f"❌ Server not responding: {response.status_code}")
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print(f"❌ Failed to start server: {e}")
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
def stop_server(self):
|
|
54
|
+
"""Stop the server cleanly"""
|
|
55
|
+
if self.server_process:
|
|
56
|
+
print("🛑 Stopping server...")
|
|
57
|
+
try:
|
|
58
|
+
# Try graceful shutdown first
|
|
59
|
+
requests.post("http://localhost:8000/stop", timeout=2)
|
|
60
|
+
time.sleep(1)
|
|
61
|
+
except:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
# Force terminate if still running
|
|
65
|
+
try:
|
|
66
|
+
self.server_process.terminate()
|
|
67
|
+
self.server_process.wait(timeout=5)
|
|
68
|
+
print("✅ Server stopped gracefully")
|
|
69
|
+
except subprocess.TimeoutExpired:
|
|
70
|
+
print("⚠️ Server didn't stop gracefully, force killing...")
|
|
71
|
+
self.server_process.kill()
|
|
72
|
+
self.server_process.wait()
|
|
73
|
+
print("✅ Server force killed")
|
|
74
|
+
|
|
75
|
+
@pytest.fixture(scope="session", autouse=True)
|
|
76
|
+
def check_environment():
|
|
77
|
+
"""Check that required files exist"""
|
|
78
|
+
torchic_state = "tests/states/torchic.state"
|
|
79
|
+
if not os.path.exists(torchic_state):
|
|
80
|
+
pytest.skip(f"Torchic state file not found: {torchic_state}")
|
|
81
|
+
|
|
82
|
+
print(f"✅ Found torchic state file: {torchic_state}")
|
|
83
|
+
|
|
84
|
+
def test_torchic_state_loading():
|
|
85
|
+
"""Test that the torchic state loads correctly"""
|
|
86
|
+
server_manager = ServerManager()
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
# Start server with torchic state
|
|
90
|
+
assert server_manager.start_server("tests/states/torchic.state"), "Failed to start server"
|
|
91
|
+
|
|
92
|
+
# Get comprehensive state
|
|
93
|
+
response = requests.get("http://localhost:8000/state", timeout=10)
|
|
94
|
+
assert response.status_code == 200, f"Failed to get state: {response.status_code}"
|
|
95
|
+
|
|
96
|
+
state_data = response.json()
|
|
97
|
+
|
|
98
|
+
# Test basic state structure
|
|
99
|
+
assert "player" in state_data, "State missing player data"
|
|
100
|
+
assert "game" in state_data, "State missing game data"
|
|
101
|
+
assert "visual" in state_data, "State missing visual data"
|
|
102
|
+
|
|
103
|
+
# Test player data
|
|
104
|
+
player = state_data["player"]
|
|
105
|
+
assert "name" in player, "Player data missing name"
|
|
106
|
+
assert "location" in player, "Player data missing location"
|
|
107
|
+
assert "position" in player, "Player data missing position"
|
|
108
|
+
assert "party" in player, "Player data missing party"
|
|
109
|
+
|
|
110
|
+
# Test that player is in Route 101 (where the torchic state is)
|
|
111
|
+
location = player["location"]
|
|
112
|
+
print(f"📍 Player location: {location}")
|
|
113
|
+
assert "ROUTE 101" in location.upper(), f"Expected player to be in Route 101, but found: {location}"
|
|
114
|
+
|
|
115
|
+
# Test party data
|
|
116
|
+
party = player["party"]
|
|
117
|
+
assert isinstance(party, list), "Party should be a list"
|
|
118
|
+
assert len(party) > 0, "Party should not be empty"
|
|
119
|
+
|
|
120
|
+
# Test that first Pokemon is Torchic
|
|
121
|
+
first_pokemon = party[0]
|
|
122
|
+
assert "species_name" in first_pokemon, "Pokemon missing species_name"
|
|
123
|
+
species_name = first_pokemon["species_name"]
|
|
124
|
+
print(f"🔥 First Pokemon: {species_name}")
|
|
125
|
+
assert species_name.upper() == "TORCHIC", f"Expected Torchic, but found: {species_name}"
|
|
126
|
+
|
|
127
|
+
# Test Pokemon data structure
|
|
128
|
+
assert "level" in first_pokemon, "Pokemon missing level"
|
|
129
|
+
assert "current_hp" in first_pokemon, "Pokemon missing current_hp"
|
|
130
|
+
assert "max_hp" in first_pokemon, "Pokemon missing max_hp"
|
|
131
|
+
assert "moves" in first_pokemon, "Pokemon missing moves"
|
|
132
|
+
|
|
133
|
+
print(f"✅ Torchic level: {first_pokemon['level']}")
|
|
134
|
+
print(f"✅ Torchic HP: {first_pokemon['current_hp']}/{first_pokemon['max_hp']}")
|
|
135
|
+
print(f"✅ Torchic moves: {first_pokemon['moves']}")
|
|
136
|
+
|
|
137
|
+
finally:
|
|
138
|
+
server_manager.stop_server()
|
|
139
|
+
|
|
140
|
+
def test_torchic_milestones():
|
|
141
|
+
"""Test that milestones are correctly detected for torchic state"""
|
|
142
|
+
server_manager = ServerManager()
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
# Start server with torchic state
|
|
146
|
+
assert server_manager.start_server("tests/states/torchic.state"), "Failed to start server"
|
|
147
|
+
|
|
148
|
+
# Get milestones
|
|
149
|
+
response = requests.get("http://localhost:8000/milestones", timeout=10)
|
|
150
|
+
assert response.status_code == 200, f"Failed to get milestones: {response.status_code}"
|
|
151
|
+
|
|
152
|
+
milestones_data = response.json()
|
|
153
|
+
|
|
154
|
+
# Test milestones structure
|
|
155
|
+
assert "milestones" in milestones_data, "Milestones data missing milestones list"
|
|
156
|
+
assert "completed" in milestones_data, "Milestones data missing completed count"
|
|
157
|
+
assert "total" in milestones_data, "Milestones data missing total count"
|
|
158
|
+
assert "progress" in milestones_data, "Milestones data missing progress"
|
|
159
|
+
assert "current_location" in milestones_data, "Milestones data missing current_location"
|
|
160
|
+
|
|
161
|
+
milestones = milestones_data["milestones"]
|
|
162
|
+
completed = milestones_data["completed"]
|
|
163
|
+
total = milestones_data["total"]
|
|
164
|
+
progress = milestones_data["progress"]
|
|
165
|
+
current_location = milestones_data["current_location"]
|
|
166
|
+
|
|
167
|
+
print(f"📊 Milestones progress: {completed}/{total} ({progress:.1%})")
|
|
168
|
+
print(f"📍 Current location: {current_location}")
|
|
169
|
+
|
|
170
|
+
# Test that Littleroot Town milestone exists (but may not be completed since we're in Route 101)
|
|
171
|
+
littleroot_milestone = None
|
|
172
|
+
for milestone in milestones:
|
|
173
|
+
if "LITTLEROOT" in milestone["name"].upper():
|
|
174
|
+
littleroot_milestone = milestone
|
|
175
|
+
break
|
|
176
|
+
|
|
177
|
+
assert littleroot_milestone is not None, "Littleroot Town milestone not found"
|
|
178
|
+
print(f"🏘️ Littleroot milestone: {littleroot_milestone}")
|
|
179
|
+
|
|
180
|
+
# Test that current location is Route 101
|
|
181
|
+
assert "ROUTE 101" in current_location.upper(), f"Current location should be Route 101, but found: {current_location}"
|
|
182
|
+
|
|
183
|
+
# Test that basic milestones are completed
|
|
184
|
+
basic_milestones = ["GAME_RUNNING", "HAS_PARTY", "STARTER_CHOSEN", "TORCHIC_OBTAINED", "ROUTE_101_VISITED"]
|
|
185
|
+
for milestone_name in basic_milestones:
|
|
186
|
+
milestone = next((m for m in milestones if m["name"] == milestone_name), None)
|
|
187
|
+
assert milestone is not None, f"Basic milestone {milestone_name} not found"
|
|
188
|
+
assert milestone["completed"] == True, f"Basic milestone {milestone_name} should be completed"
|
|
189
|
+
print(f"✅ {milestone_name}: Completed")
|
|
190
|
+
|
|
191
|
+
# Test that some milestones are not yet completed (game just started)
|
|
192
|
+
incomplete_milestones = ["STONE_BADGE", "POKEDEX_RECEIVED", "FIRST_WILD_ENCOUNTER", "LITTLEROOT_TOWN"]
|
|
193
|
+
for milestone_name in incomplete_milestones:
|
|
194
|
+
milestone = next((m for m in milestones if m["name"] == milestone_name), None)
|
|
195
|
+
assert milestone is not None, f"Milestone {milestone_name} not found"
|
|
196
|
+
assert milestone["completed"] == False, f"Milestone {milestone_name} should not be completed yet"
|
|
197
|
+
print(f"⏳ {milestone_name}: Not completed yet")
|
|
198
|
+
|
|
199
|
+
finally:
|
|
200
|
+
server_manager.stop_server()
|
|
201
|
+
|
|
202
|
+
def test_torchic_state_summary():
|
|
203
|
+
"""Test that the torchic state provides a comprehensive summary"""
|
|
204
|
+
server_manager = ServerManager()
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
# Start server with torchic state
|
|
208
|
+
assert server_manager.start_server("tests/states/torchic.state"), "Failed to start server"
|
|
209
|
+
|
|
210
|
+
# Get comprehensive state
|
|
211
|
+
response = requests.get("http://localhost:8000/state", timeout=10)
|
|
212
|
+
assert response.status_code == 200, f"Failed to get state: {response.status_code}"
|
|
213
|
+
|
|
214
|
+
state_data = response.json()
|
|
215
|
+
|
|
216
|
+
# Test game state
|
|
217
|
+
game = state_data["game"]
|
|
218
|
+
assert "money" in game, "Game data missing money"
|
|
219
|
+
assert "game_state" in game, "Game data missing game_state"
|
|
220
|
+
assert "is_in_battle" in game, "Game data missing is_in_battle"
|
|
221
|
+
assert "badges" in game, "Game data missing badges"
|
|
222
|
+
assert "dialog_text" in game, "Game data missing dialog_text"
|
|
223
|
+
|
|
224
|
+
# Test that player has some money (starter money)
|
|
225
|
+
money = game["money"]
|
|
226
|
+
print(f"💰 Player money: {money}")
|
|
227
|
+
assert money >= 0, "Player should have non-negative money"
|
|
228
|
+
|
|
229
|
+
# Test that player is not in battle
|
|
230
|
+
is_in_battle = game["is_in_battle"]
|
|
231
|
+
print(f"⚔️ In battle: {is_in_battle}")
|
|
232
|
+
assert is_in_battle == False, "Player should not be in battle at start"
|
|
233
|
+
|
|
234
|
+
# Test that player has no badges yet
|
|
235
|
+
badges = game["badges"]
|
|
236
|
+
print(f"🏆 Badges: {badges}")
|
|
237
|
+
assert len(badges) == 0, "Player should have no badges at start"
|
|
238
|
+
|
|
239
|
+
# Test visual data
|
|
240
|
+
visual = state_data["visual"]
|
|
241
|
+
assert "screenshot_base64" in visual, "Visual data missing screenshot"
|
|
242
|
+
assert "resolution" in visual, "Visual data missing resolution"
|
|
243
|
+
|
|
244
|
+
resolution = visual["resolution"]
|
|
245
|
+
print(f"📺 Resolution: {resolution}")
|
|
246
|
+
assert resolution == [240, 160], f"Expected resolution [240, 160], got {resolution}"
|
|
247
|
+
|
|
248
|
+
# Test that screenshot is present
|
|
249
|
+
screenshot = visual["screenshot_base64"]
|
|
250
|
+
assert len(screenshot) > 0, "Screenshot should not be empty"
|
|
251
|
+
print(f"📸 Screenshot size: {len(screenshot)} characters")
|
|
252
|
+
|
|
253
|
+
print("✅ Torchic state test completed successfully")
|
|
254
|
+
|
|
255
|
+
finally:
|
|
256
|
+
server_manager.stop_server()
|
|
257
|
+
|
|
258
|
+
if __name__ == "__main__":
|
|
259
|
+
pytest.main([__file__, "-v"])
|