synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +63 -0
- synth_ai/api/train/builders.py +473 -0
- synth_ai/api/train/cli.py +1185 -0
- synth_ai/api/train/config_finder.py +246 -0
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +352 -0
- synth_ai/api/train/pollers.py +91 -0
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +195 -0
- synth_ai/api/train/utils.py +244 -0
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +90 -45
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +166 -114
- synth_ai/cli/root.py +143 -112
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +3134 -0
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +745 -416
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +7 -1
- synth_ai/demos/demo_task_apps/core.py +75 -37
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/config.toml +55 -110
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +116 -3
- synth_ai/task/apps/__init__.py +132 -0
- synth_ai/task/auth.py +165 -0
- synth_ai/task/client.py +167 -0
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +173 -57
- synth_ai/task/datasets.py +108 -0
- synth_ai/task/errors.py +50 -0
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +111 -0
- synth_ai/task/proxy.py +251 -0
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/server.py +432 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +95 -0
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +59 -0
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +86 -21
- synth_ai/tracing_v3/storage/base.py +98 -12
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -63
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -760
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
- synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,17 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
1
|
"""Main SessionTracer class for tracing v3."""
|
|
3
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
4
5
|
import asyncio
|
|
6
|
+
import json
|
|
5
7
|
from contextlib import asynccontextmanager
|
|
6
|
-
from datetime import datetime
|
|
8
|
+
from datetime import UTC, datetime
|
|
7
9
|
from typing import Any
|
|
8
10
|
|
|
9
11
|
from .abstractions import (
|
|
10
12
|
BaseEvent,
|
|
11
13
|
SessionEventMarkovBlanketMessage,
|
|
14
|
+
SessionMessageContent,
|
|
12
15
|
SessionTimeStep,
|
|
13
16
|
SessionTrace,
|
|
14
17
|
TimeRecord,
|
|
@@ -16,7 +19,9 @@ from .abstractions import (
|
|
|
16
19
|
from .config import CONFIG
|
|
17
20
|
from .decorators import set_session_id, set_session_tracer, set_turn_number
|
|
18
21
|
from .hooks import GLOBAL_HOOKS, HookManager
|
|
19
|
-
from .
|
|
22
|
+
from .storage.base import TraceStorage
|
|
23
|
+
from .storage.config import StorageConfig
|
|
24
|
+
from .storage.factory import create_storage
|
|
20
25
|
from .utils import generate_session_id
|
|
21
26
|
|
|
22
27
|
|
|
@@ -28,6 +33,8 @@ class SessionTracer:
|
|
|
28
33
|
hooks: HookManager | None = None,
|
|
29
34
|
db_url: str | None = None,
|
|
30
35
|
auto_save: bool = True,
|
|
36
|
+
storage: TraceStorage | None = None,
|
|
37
|
+
storage_config: StorageConfig | None = None,
|
|
31
38
|
):
|
|
32
39
|
"""Initialize session tracer.
|
|
33
40
|
|
|
@@ -40,7 +47,8 @@ class SessionTracer:
|
|
|
40
47
|
self._current_trace: SessionTrace | None = None
|
|
41
48
|
self._lock = asyncio.Lock()
|
|
42
49
|
self.db_url = db_url or CONFIG.db_url
|
|
43
|
-
self.
|
|
50
|
+
self._storage_config = storage_config
|
|
51
|
+
self.db: TraceStorage | None = storage
|
|
44
52
|
self.auto_save = auto_save
|
|
45
53
|
self._current_step: SessionTimeStep | None = None
|
|
46
54
|
|
|
@@ -57,7 +65,8 @@ class SessionTracer:
|
|
|
57
65
|
async def initialize(self):
|
|
58
66
|
"""Initialize the database connection."""
|
|
59
67
|
if self.db is None:
|
|
60
|
-
self.
|
|
68
|
+
config = self._storage_config or StorageConfig(connection_string=self.db_url)
|
|
69
|
+
self.db = create_storage(config)
|
|
61
70
|
await self.db.initialize()
|
|
62
71
|
|
|
63
72
|
async def start_session(
|
|
@@ -97,7 +106,7 @@ class SessionTracer:
|
|
|
97
106
|
|
|
98
107
|
self._current_trace = SessionTrace(
|
|
99
108
|
session_id=session_id,
|
|
100
|
-
created_at=datetime.
|
|
109
|
+
created_at=datetime.now(UTC),
|
|
101
110
|
session_time_steps=[],
|
|
102
111
|
event_history=[],
|
|
103
112
|
markov_blanket_message_history=[],
|
|
@@ -110,7 +119,9 @@ class SessionTracer:
|
|
|
110
119
|
|
|
111
120
|
# Ensure session row exists for incremental writes
|
|
112
121
|
if self.db:
|
|
113
|
-
await self.db.ensure_session(
|
|
122
|
+
await self.db.ensure_session(
|
|
123
|
+
session_id, created_at=self._current_trace.created_at, metadata=metadata or {}
|
|
124
|
+
)
|
|
114
125
|
|
|
115
126
|
# Trigger hooks
|
|
116
127
|
await self.hooks.trigger(
|
|
@@ -141,7 +152,7 @@ class SessionTracer:
|
|
|
141
152
|
step = SessionTimeStep(
|
|
142
153
|
step_id=step_id,
|
|
143
154
|
step_index=len(self._current_trace.session_time_steps),
|
|
144
|
-
timestamp=datetime.
|
|
155
|
+
timestamp=datetime.now(UTC),
|
|
145
156
|
turn_number=turn_number,
|
|
146
157
|
step_metadata=metadata or {},
|
|
147
158
|
)
|
|
@@ -186,7 +197,7 @@ class SessionTracer:
|
|
|
186
197
|
step = self._current_step
|
|
187
198
|
|
|
188
199
|
if step and step.completed_at is None:
|
|
189
|
-
step.completed_at = datetime.
|
|
200
|
+
step.completed_at = datetime.now(UTC)
|
|
190
201
|
|
|
191
202
|
# Trigger hooks
|
|
192
203
|
await self.hooks.trigger(
|
|
@@ -234,7 +245,7 @@ class SessionTracer:
|
|
|
234
245
|
event_id = await self.db.insert_event_row(
|
|
235
246
|
self._current_trace.session_id,
|
|
236
247
|
timestep_db_id=timestep_db_id,
|
|
237
|
-
event=event,
|
|
248
|
+
event=event, # type: ignore[arg-type]
|
|
238
249
|
)
|
|
239
250
|
# Auto-insert an event reward if EnvironmentEvent carries reward
|
|
240
251
|
try:
|
|
@@ -259,7 +270,7 @@ class SessionTracer:
|
|
|
259
270
|
|
|
260
271
|
async def record_message(
|
|
261
272
|
self,
|
|
262
|
-
content:
|
|
273
|
+
content: Any,
|
|
263
274
|
message_type: str,
|
|
264
275
|
event_time: float | None = None,
|
|
265
276
|
message_time: int | None = None,
|
|
@@ -277,11 +288,13 @@ class SessionTracer:
|
|
|
277
288
|
if self._current_trace is None:
|
|
278
289
|
raise RuntimeError("No active session")
|
|
279
290
|
|
|
291
|
+
normalised_content, content_str = self._normalise_message_content(content)
|
|
292
|
+
|
|
280
293
|
msg = SessionEventMarkovBlanketMessage(
|
|
281
|
-
content=
|
|
294
|
+
content=normalised_content,
|
|
282
295
|
message_type=message_type,
|
|
283
296
|
time_record=TimeRecord(
|
|
284
|
-
event_time=event_time or datetime.
|
|
297
|
+
event_time=event_time or datetime.now(UTC).timestamp(), message_time=message_time
|
|
285
298
|
),
|
|
286
299
|
metadata=metadata or {},
|
|
287
300
|
)
|
|
@@ -315,7 +328,7 @@ class SessionTracer:
|
|
|
315
328
|
self._current_trace.session_id,
|
|
316
329
|
timestep_db_id=timestep_db_id,
|
|
317
330
|
message_type=message_type,
|
|
318
|
-
content=
|
|
331
|
+
content=content_str,
|
|
319
332
|
event_time=msg.time_record.event_time,
|
|
320
333
|
message_time=msg.time_record.message_time,
|
|
321
334
|
metadata=msg.metadata,
|
|
@@ -323,7 +336,23 @@ class SessionTracer:
|
|
|
323
336
|
return message_id
|
|
324
337
|
return None
|
|
325
338
|
|
|
326
|
-
|
|
339
|
+
@staticmethod
|
|
340
|
+
def _normalise_message_content(content: Any) -> tuple[SessionMessageContent, str]:
|
|
341
|
+
if isinstance(content, SessionMessageContent):
|
|
342
|
+
return content, content.as_text()
|
|
343
|
+
if isinstance(content, str):
|
|
344
|
+
payload = SessionMessageContent(text=content)
|
|
345
|
+
return payload, payload.as_text()
|
|
346
|
+
try:
|
|
347
|
+
serialized = json.dumps(content, ensure_ascii=False)
|
|
348
|
+
payload = SessionMessageContent(json_payload=serialized)
|
|
349
|
+
return payload, serialized
|
|
350
|
+
except (TypeError, ValueError):
|
|
351
|
+
text = str(content)
|
|
352
|
+
payload = SessionMessageContent(text=text)
|
|
353
|
+
return payload, text
|
|
354
|
+
|
|
355
|
+
async def end_session(self, save: bool | None = None) -> SessionTrace:
|
|
327
356
|
"""End the current session.
|
|
328
357
|
|
|
329
358
|
Args:
|
|
@@ -339,18 +368,28 @@ class SessionTracer:
|
|
|
339
368
|
# End any open timesteps
|
|
340
369
|
for step in self._current_trace.session_time_steps:
|
|
341
370
|
if step.completed_at is None:
|
|
342
|
-
step.completed_at = datetime.
|
|
371
|
+
step.completed_at = datetime.now(UTC)
|
|
343
372
|
|
|
344
373
|
# Trigger pre-save hooks
|
|
345
374
|
await self.hooks.trigger("before_save", session=self._current_trace)
|
|
346
375
|
|
|
347
376
|
# Save if requested
|
|
348
377
|
should_save = save if save is not None else self.auto_save
|
|
378
|
+
|
|
379
|
+
# Debug logging
|
|
380
|
+
import logging
|
|
381
|
+
_logger = logging.getLogger(__name__)
|
|
382
|
+
_logger.info(f"[TRACE_DEBUG] end_session: should_save={should_save}, self.db={self.db is not None}, auto_save={self.auto_save}")
|
|
383
|
+
|
|
349
384
|
if should_save and self.db:
|
|
385
|
+
_logger.info(f"[TRACE_DEBUG] Calling insert_session_trace with {len(self._current_trace.markov_blanket_message_history)} messages")
|
|
350
386
|
await self.db.insert_session_trace(self._current_trace)
|
|
387
|
+
_logger.info("[TRACE_DEBUG] insert_session_trace completed")
|
|
351
388
|
|
|
352
389
|
# Trigger post-save hooks
|
|
353
390
|
await self.hooks.trigger("after_save", session=self._current_trace)
|
|
391
|
+
else:
|
|
392
|
+
_logger.warning(f"[TRACE_DEBUG] Skipping save: should_save={should_save}, self.db={self.db is not None}")
|
|
354
393
|
|
|
355
394
|
# Trigger session end hooks
|
|
356
395
|
await self.hooks.trigger("session_end", session=self._current_trace)
|
|
@@ -370,7 +409,7 @@ class SessionTracer:
|
|
|
370
409
|
self,
|
|
371
410
|
session_id: str | None = None,
|
|
372
411
|
metadata: dict[str, Any] | None = None,
|
|
373
|
-
save: bool = None,
|
|
412
|
+
save: bool | None = None,
|
|
374
413
|
):
|
|
375
414
|
"""Context manager for a session.
|
|
376
415
|
|
|
@@ -414,8 +453,16 @@ class SessionTracer:
|
|
|
414
453
|
if limit:
|
|
415
454
|
query += f" LIMIT {limit}"
|
|
416
455
|
|
|
417
|
-
|
|
418
|
-
|
|
456
|
+
# Ensure DB initialized before querying
|
|
457
|
+
if self.db is None:
|
|
458
|
+
await self.initialize()
|
|
459
|
+
df_or_records = await self.db.query_traces(query) # type: ignore[union-attr]
|
|
460
|
+
try:
|
|
461
|
+
# If pandas DataFrame
|
|
462
|
+
return df_or_records.to_dict("records") # type: ignore[call-arg, attr-defined]
|
|
463
|
+
except AttributeError:
|
|
464
|
+
# Already list of dicts
|
|
465
|
+
return df_or_records
|
|
419
466
|
|
|
420
467
|
async def close(self):
|
|
421
468
|
"""Close database connections."""
|
|
@@ -427,7 +474,14 @@ class SessionTracer:
|
|
|
427
474
|
# Reward recording helpers
|
|
428
475
|
# -------------------------------
|
|
429
476
|
|
|
430
|
-
async def record_outcome_reward(
|
|
477
|
+
async def record_outcome_reward(
|
|
478
|
+
self,
|
|
479
|
+
*,
|
|
480
|
+
total_reward: int,
|
|
481
|
+
achievements_count: int,
|
|
482
|
+
total_steps: int,
|
|
483
|
+
reward_metadata: dict[str, Any] | None = None,
|
|
484
|
+
) -> int | None:
|
|
431
485
|
"""Record an episode-level outcome reward for the current session."""
|
|
432
486
|
if self._current_trace is None:
|
|
433
487
|
raise RuntimeError("No active session")
|
|
@@ -454,7 +508,18 @@ class SessionTracer:
|
|
|
454
508
|
|
|
455
509
|
# StepMetrics removed in favor of event_rewards; use record_event_reward for per-turn shaped values
|
|
456
510
|
|
|
457
|
-
async def record_event_reward(
|
|
511
|
+
async def record_event_reward(
|
|
512
|
+
self,
|
|
513
|
+
*,
|
|
514
|
+
event_id: int,
|
|
515
|
+
message_id: int | None = None,
|
|
516
|
+
turn_number: int | None = None,
|
|
517
|
+
reward_value: float = 0.0,
|
|
518
|
+
reward_type: str | None = None,
|
|
519
|
+
key: str | None = None,
|
|
520
|
+
annotation: dict[str, Any] | None = None,
|
|
521
|
+
source: str | None = None,
|
|
522
|
+
) -> int | None:
|
|
458
523
|
"""Record a first-class event-level reward with optional annotations."""
|
|
459
524
|
if self._current_trace is None:
|
|
460
525
|
raise RuntimeError("No active session")
|
|
@@ -4,8 +4,6 @@ from abc import ABC, abstractmethod
|
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
|
-
import pandas as pd
|
|
8
|
-
|
|
9
7
|
from ..abstractions import SessionTrace
|
|
10
8
|
|
|
11
9
|
|
|
@@ -42,22 +40,25 @@ class TraceStorage(ABC):
|
|
|
42
40
|
pass
|
|
43
41
|
|
|
44
42
|
@abstractmethod
|
|
45
|
-
async def query_traces(self, query: str, params: dict[str, Any] = None) ->
|
|
46
|
-
"""Execute a query and return results
|
|
43
|
+
async def query_traces(self, query: str, params: dict[str, Any] | None = None) -> Any:
|
|
44
|
+
"""Execute a query and return results.
|
|
47
45
|
|
|
48
46
|
Args:
|
|
49
47
|
query: The SQL query to execute
|
|
50
48
|
params: Optional query parameters
|
|
51
49
|
|
|
52
50
|
Returns:
|
|
53
|
-
Query results as a DataFrame
|
|
51
|
+
Query results as a DataFrame-like object or list of dict records
|
|
54
52
|
"""
|
|
55
53
|
pass
|
|
56
54
|
|
|
57
55
|
@abstractmethod
|
|
58
56
|
async def get_model_usage(
|
|
59
|
-
self,
|
|
60
|
-
|
|
57
|
+
self,
|
|
58
|
+
start_date: datetime | None = None,
|
|
59
|
+
end_date: datetime | None = None,
|
|
60
|
+
model_name: str | None = None,
|
|
61
|
+
) -> Any:
|
|
61
62
|
"""Get model usage statistics.
|
|
62
63
|
|
|
63
64
|
Args:
|
|
@@ -66,7 +67,7 @@ class TraceStorage(ABC):
|
|
|
66
67
|
model_name: Optional model name filter
|
|
67
68
|
|
|
68
69
|
Returns:
|
|
69
|
-
Model usage statistics as a DataFrame
|
|
70
|
+
Model usage statistics as a DataFrame-like object or list of dict records
|
|
70
71
|
"""
|
|
71
72
|
pass
|
|
72
73
|
|
|
@@ -87,13 +88,98 @@ class TraceStorage(ABC):
|
|
|
87
88
|
"""Close the storage connection."""
|
|
88
89
|
pass
|
|
89
90
|
|
|
91
|
+
# Incremental helpers -------------------------------------------------
|
|
92
|
+
|
|
93
|
+
@abstractmethod
|
|
94
|
+
async def ensure_session(
|
|
95
|
+
self,
|
|
96
|
+
session_id: str,
|
|
97
|
+
*,
|
|
98
|
+
created_at: datetime | None = None,
|
|
99
|
+
metadata: dict[str, Any] | None = None,
|
|
100
|
+
) -> None:
|
|
101
|
+
"""Ensure a session row exists for the given session id."""
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
@abstractmethod
|
|
105
|
+
async def ensure_timestep(
|
|
106
|
+
self,
|
|
107
|
+
session_id: str,
|
|
108
|
+
*,
|
|
109
|
+
step_id: str,
|
|
110
|
+
step_index: int,
|
|
111
|
+
turn_number: int | None = None,
|
|
112
|
+
started_at: datetime | None = None,
|
|
113
|
+
completed_at: datetime | None = None,
|
|
114
|
+
metadata: dict[str, Any] | None = None,
|
|
115
|
+
) -> int:
|
|
116
|
+
"""Ensure a timestep row exists and return its database id."""
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
@abstractmethod
|
|
120
|
+
async def insert_event_row(
|
|
121
|
+
self,
|
|
122
|
+
session_id: str,
|
|
123
|
+
*,
|
|
124
|
+
timestep_db_id: int | None,
|
|
125
|
+
event: Any,
|
|
126
|
+
metadata_override: dict[str, Any] | None = None,
|
|
127
|
+
) -> int:
|
|
128
|
+
"""Insert an event and return its database id."""
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
@abstractmethod
|
|
132
|
+
async def insert_message_row(
|
|
133
|
+
self,
|
|
134
|
+
session_id: str,
|
|
135
|
+
*,
|
|
136
|
+
timestep_db_id: int | None,
|
|
137
|
+
message_type: str,
|
|
138
|
+
content: Any,
|
|
139
|
+
event_time: float | None = None,
|
|
140
|
+
message_time: int | None = None,
|
|
141
|
+
metadata: dict[str, Any] | None = None,
|
|
142
|
+
) -> int:
|
|
143
|
+
"""Insert a message row linked to a session/timestep."""
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
@abstractmethod
|
|
147
|
+
async def insert_outcome_reward(
|
|
148
|
+
self,
|
|
149
|
+
session_id: str,
|
|
150
|
+
*,
|
|
151
|
+
total_reward: int,
|
|
152
|
+
achievements_count: int,
|
|
153
|
+
total_steps: int,
|
|
154
|
+
reward_metadata: dict | None = None,
|
|
155
|
+
) -> int:
|
|
156
|
+
"""Record an outcome reward for a session."""
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
@abstractmethod
|
|
160
|
+
async def insert_event_reward(
|
|
161
|
+
self,
|
|
162
|
+
session_id: str,
|
|
163
|
+
*,
|
|
164
|
+
event_id: int,
|
|
165
|
+
message_id: int | None = None,
|
|
166
|
+
turn_number: int | None = None,
|
|
167
|
+
reward_value: float = 0.0,
|
|
168
|
+
reward_type: str | None = None,
|
|
169
|
+
key: str | None = None,
|
|
170
|
+
annotation: dict[str, Any] | None = None,
|
|
171
|
+
source: str | None = None,
|
|
172
|
+
) -> int:
|
|
173
|
+
"""Record a reward tied to a specific event."""
|
|
174
|
+
pass
|
|
175
|
+
|
|
90
176
|
# Optional experiment management methods
|
|
91
177
|
async def create_experiment(
|
|
92
178
|
self,
|
|
93
179
|
experiment_id: str,
|
|
94
180
|
name: str,
|
|
95
|
-
description: str = None,
|
|
96
|
-
configuration: dict[str, Any] = None,
|
|
181
|
+
description: str | None = None,
|
|
182
|
+
configuration: dict[str, Any] | None = None,
|
|
97
183
|
) -> str:
|
|
98
184
|
"""Create a new experiment."""
|
|
99
185
|
raise NotImplementedError("Experiment management not supported by this backend")
|
|
@@ -103,14 +189,14 @@ class TraceStorage(ABC):
|
|
|
103
189
|
raise NotImplementedError("Experiment management not supported by this backend")
|
|
104
190
|
|
|
105
191
|
async def get_sessions_by_experiment(
|
|
106
|
-
self, experiment_id: str, limit: int = None
|
|
192
|
+
self, experiment_id: str, limit: int | None = None
|
|
107
193
|
) -> list[dict[str, Any]]:
|
|
108
194
|
"""Get all sessions for an experiment."""
|
|
109
195
|
raise NotImplementedError("Experiment management not supported by this backend")
|
|
110
196
|
|
|
111
197
|
# Batch operations
|
|
112
198
|
async def batch_insert_sessions(
|
|
113
|
-
self, traces: list[SessionTrace], batch_size: int = 1000
|
|
199
|
+
self, traces: list[SessionTrace], batch_size: int | None = 1000
|
|
114
200
|
) -> list[str]:
|
|
115
201
|
"""Batch insert multiple session traces.
|
|
116
202
|
|
|
@@ -1,29 +1,34 @@
|
|
|
1
1
|
"""Storage configuration for tracing v3."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
-
from dataclasses import dataclass
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
+
from ..config import resolve_trace_db_auth_token, resolve_trace_db_settings
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
class StorageBackend(str, Enum):
|
|
10
12
|
"""Supported storage backends."""
|
|
11
13
|
|
|
12
|
-
|
|
14
|
+
TURSO_NATIVE = "turso_native"
|
|
13
15
|
SQLITE = "sqlite"
|
|
14
16
|
POSTGRES = "postgres" # Future support
|
|
15
17
|
|
|
16
18
|
|
|
19
|
+
def _is_enabled(value: str | None) -> bool:
|
|
20
|
+
if value is None:
|
|
21
|
+
return False
|
|
22
|
+
return value.lower() in {"1", "true", "yes", "on"}
|
|
23
|
+
|
|
24
|
+
|
|
17
25
|
@dataclass
|
|
18
26
|
class StorageConfig:
|
|
19
27
|
"""Configuration for storage backend."""
|
|
20
28
|
|
|
21
|
-
backend: StorageBackend = StorageBackend.TURSO
|
|
22
29
|
connection_string: str | None = None
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
turso_url: str = os.getenv("TURSO_DATABASE_URL", "sqlite+libsql://http://127.0.0.1:8080")
|
|
26
|
-
turso_auth_token: str = os.getenv("TURSO_AUTH_TOKEN", "")
|
|
30
|
+
backend: StorageBackend | None = None
|
|
31
|
+
turso_auth_token: str | None = field(default=None)
|
|
27
32
|
|
|
28
33
|
# Common settings
|
|
29
34
|
pool_size: int = int(os.getenv("STORAGE_POOL_SIZE", "8"))
|
|
@@ -34,23 +39,65 @@ class StorageConfig:
|
|
|
34
39
|
enable_compression: bool = os.getenv("STORAGE_COMPRESSION", "false").lower() == "true"
|
|
35
40
|
max_content_length: int = int(os.getenv("STORAGE_MAX_CONTENT_LENGTH", "1000000")) # 1MB
|
|
36
41
|
|
|
42
|
+
def __post_init__(self):
|
|
43
|
+
# Allow legacy override while keeping compatibility with existing TURSO_NATIVE env flag
|
|
44
|
+
native_env = os.getenv("TURSO_NATIVE")
|
|
45
|
+
native_flag = _is_enabled(native_env) if native_env is not None else None
|
|
46
|
+
resolved_url: str | None = self.connection_string
|
|
47
|
+
resolved_token: str | None = self.turso_auth_token
|
|
48
|
+
|
|
49
|
+
if resolved_url is None:
|
|
50
|
+
resolved_url, inferred_token = resolve_trace_db_settings()
|
|
51
|
+
self.connection_string = resolved_url
|
|
52
|
+
resolved_token = inferred_token
|
|
53
|
+
|
|
54
|
+
if resolved_token is None:
|
|
55
|
+
resolved_token = resolve_trace_db_auth_token()
|
|
56
|
+
|
|
57
|
+
self.turso_auth_token = resolved_token or ""
|
|
58
|
+
|
|
59
|
+
if self.backend is None:
|
|
60
|
+
self.backend = self._infer_backend(self.connection_string or "")
|
|
61
|
+
|
|
62
|
+
if native_flag is False:
|
|
63
|
+
raise RuntimeError("TURSO_NATIVE=false is no longer supported; only Turso/libSQL backend is available.")
|
|
64
|
+
|
|
65
|
+
# Allow both TURSO_NATIVE and SQLITE backends (both use libsql.connect)
|
|
66
|
+
if self.backend not in (StorageBackend.TURSO_NATIVE, StorageBackend.SQLITE):
|
|
67
|
+
raise RuntimeError(f"Unsupported backend: {self.backend}. Only Turso/libSQL and SQLite are supported.")
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def _infer_backend(connection_string: str) -> StorageBackend:
|
|
71
|
+
"""Infer backend type from the connection string."""
|
|
72
|
+
scheme = connection_string.split(":", 1)[0].lower()
|
|
73
|
+
|
|
74
|
+
# Plain SQLite files: file://, /absolute/path, or no scheme
|
|
75
|
+
if (
|
|
76
|
+
scheme == "file"
|
|
77
|
+
or scheme.startswith("sqlite")
|
|
78
|
+
or connection_string.startswith("/")
|
|
79
|
+
or "://" not in connection_string
|
|
80
|
+
):
|
|
81
|
+
return StorageBackend.SQLITE
|
|
82
|
+
|
|
83
|
+
# Turso/sqld: libsql://, http://, https://
|
|
84
|
+
if scheme.startswith("libsql") or "libsql" in scheme or scheme in ("http", "https"):
|
|
85
|
+
return StorageBackend.TURSO_NATIVE
|
|
86
|
+
|
|
87
|
+
raise RuntimeError(f"Unsupported tracing backend scheme: {scheme}")
|
|
88
|
+
|
|
37
89
|
def get_connection_string(self) -> str:
|
|
38
90
|
"""Get the appropriate connection string for the backend."""
|
|
39
91
|
if self.connection_string:
|
|
40
92
|
return self.connection_string
|
|
41
93
|
|
|
42
|
-
if self.backend == StorageBackend.
|
|
43
|
-
return self.
|
|
44
|
-
|
|
45
|
-
return "sqlite+aiosqlite:///traces.db"
|
|
46
|
-
elif self.backend == StorageBackend.POSTGRES:
|
|
47
|
-
return os.getenv("POSTGRES_URL", "postgresql+asyncpg://localhost/traces")
|
|
48
|
-
else:
|
|
49
|
-
raise ValueError(f"Unknown backend: {self.backend}")
|
|
94
|
+
if self.backend == StorageBackend.TURSO_NATIVE:
|
|
95
|
+
return self.connection_string or ""
|
|
96
|
+
raise ValueError(f"Unsupported backend: {self.backend}")
|
|
50
97
|
|
|
51
98
|
def get_backend_config(self) -> dict[str, Any]:
|
|
52
99
|
"""Get backend-specific configuration."""
|
|
53
|
-
if self.backend == StorageBackend.
|
|
100
|
+
if self.backend == StorageBackend.TURSO_NATIVE:
|
|
54
101
|
config = {}
|
|
55
102
|
if self.turso_auth_token:
|
|
56
103
|
config["auth_token"] = self.turso_auth_token
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Factory for creating storage instances."""
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
from ..turso.manager import AsyncSQLTraceManager
|
|
3
|
+
from ..turso.native_manager import NativeLibsqlTraceManager
|
|
5
4
|
from .base import TraceStorage
|
|
6
5
|
from .config import StorageBackend, StorageConfig
|
|
7
6
|
|
|
@@ -23,13 +22,16 @@ def create_storage(config: StorageConfig | None = None) -> TraceStorage:
|
|
|
23
22
|
|
|
24
23
|
config = STORAGE_CONFIG
|
|
25
24
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
return
|
|
25
|
+
connection_string = config.get_connection_string()
|
|
26
|
+
|
|
27
|
+
# Both TURSO_NATIVE and SQLITE use NativeLibsqlTraceManager
|
|
28
|
+
# because libsql.connect() handles both remote and local file databases
|
|
29
|
+
if config.backend in (StorageBackend.TURSO_NATIVE, StorageBackend.SQLITE):
|
|
30
|
+
backend_config = config.get_backend_config()
|
|
31
|
+
return NativeLibsqlTraceManager(
|
|
32
|
+
db_url=connection_string,
|
|
33
|
+
auth_token=backend_config.get("auth_token"),
|
|
34
|
+
)
|
|
33
35
|
elif config.backend == StorageBackend.POSTGRES:
|
|
34
36
|
# Future: PostgreSQL implementation
|
|
35
37
|
raise NotImplementedError("PostgreSQL backend not yet implemented")
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import functools
|
|
5
5
|
import time
|
|
6
|
-
from collections.abc import Callable
|
|
7
|
-
from typing import Any, TypeVar
|
|
6
|
+
from collections.abc import Awaitable, Callable
|
|
7
|
+
from typing import Any, TypeVar, cast
|
|
8
8
|
|
|
9
9
|
T = TypeVar("T")
|
|
10
10
|
|
|
@@ -18,10 +18,10 @@ def retry_async(max_attempts: int = 3, delay: float = 1.0, backoff: float = 2.0)
|
|
|
18
18
|
backoff: Backoff multiplier for each retry
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
21
|
+
def decorator(func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
|
|
22
22
|
@functools.wraps(func)
|
|
23
|
-
async def wrapper(*args, **kwargs):
|
|
24
|
-
last_exception = None
|
|
23
|
+
async def wrapper(*args: Any, **kwargs: Any) -> T:
|
|
24
|
+
last_exception: Exception | None = None
|
|
25
25
|
current_delay = delay
|
|
26
26
|
|
|
27
27
|
for attempt in range(max_attempts):
|
|
@@ -35,7 +35,9 @@ def retry_async(max_attempts: int = 3, delay: float = 1.0, backoff: float = 2.0)
|
|
|
35
35
|
else:
|
|
36
36
|
raise
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
if last_exception:
|
|
39
|
+
raise last_exception
|
|
40
|
+
raise RuntimeError("Retry logic failed without exception")
|
|
39
41
|
|
|
40
42
|
return wrapper
|
|
41
43
|
|
|
@@ -169,13 +171,14 @@ STORAGE_METRICS = StorageMetrics()
|
|
|
169
171
|
def track_metrics(operation: str):
|
|
170
172
|
"""Decorator to track storage operation metrics."""
|
|
171
173
|
|
|
172
|
-
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
174
|
+
def decorator(func: Callable[..., Awaitable[T]] | Callable[..., T]) -> Callable[..., Awaitable[T]] | Callable[..., T]:
|
|
173
175
|
@functools.wraps(func)
|
|
174
|
-
async def async_wrapper(*args, **kwargs):
|
|
176
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> T:
|
|
175
177
|
start_time = time.time()
|
|
176
178
|
success = False
|
|
177
179
|
try:
|
|
178
|
-
|
|
180
|
+
async_func = cast(Callable[..., Awaitable[T]], func)
|
|
181
|
+
result = await async_func(*args, **kwargs)
|
|
179
182
|
success = True
|
|
180
183
|
return result
|
|
181
184
|
finally:
|
|
@@ -183,11 +186,12 @@ def track_metrics(operation: str):
|
|
|
183
186
|
STORAGE_METRICS.record_operation(operation, duration, success)
|
|
184
187
|
|
|
185
188
|
@functools.wraps(func)
|
|
186
|
-
def sync_wrapper(*args, **kwargs):
|
|
189
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> T:
|
|
187
190
|
start_time = time.time()
|
|
188
191
|
success = False
|
|
189
192
|
try:
|
|
190
|
-
|
|
193
|
+
sync_func = cast(Callable[..., T], func)
|
|
194
|
+
result = sync_func(*args, **kwargs)
|
|
191
195
|
success = True
|
|
192
196
|
return result
|
|
193
197
|
finally:
|