synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +63 -0
- synth_ai/api/train/builders.py +473 -0
- synth_ai/api/train/cli.py +1185 -0
- synth_ai/api/train/config_finder.py +246 -0
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +352 -0
- synth_ai/api/train/pollers.py +91 -0
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +195 -0
- synth_ai/api/train/utils.py +244 -0
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +90 -45
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +166 -114
- synth_ai/cli/root.py +143 -112
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +3134 -0
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +745 -416
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +7 -1
- synth_ai/demos/demo_task_apps/core.py +75 -37
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/config.toml +55 -110
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +116 -3
- synth_ai/task/apps/__init__.py +132 -0
- synth_ai/task/auth.py +165 -0
- synth_ai/task/client.py +167 -0
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +173 -57
- synth_ai/task/datasets.py +108 -0
- synth_ai/task/errors.py +50 -0
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +111 -0
- synth_ai/task/proxy.py +251 -0
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/server.py +432 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +95 -0
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +59 -0
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +86 -21
- synth_ai/tracing_v3/storage/base.py +98 -12
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -63
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -760
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
- synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import time
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
7
|
+
from .. import SessionTracer
|
|
8
|
+
from ..abstractions import EnvironmentEvent, LMCAISEvent, RuntimeEvent, TimeRecord
|
|
9
|
+
from ..turso.daemon import SqldDaemon
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
async def simulate_llm_call(model: str, prompt: str) -> dict:
|
|
12
|
+
async def simulate_llm_call(model: str, prompt: str) -> dict[str, Any]:
|
|
12
13
|
"""Simulate an LLM API call."""
|
|
13
14
|
await asyncio.sleep(0.1) # Simulate network latency
|
|
14
15
|
|
|
@@ -133,6 +134,9 @@ async def main():
|
|
|
133
134
|
print("\n--- Example 3: Querying Data ---")
|
|
134
135
|
|
|
135
136
|
# Get model usage statistics
|
|
137
|
+
if tracer.db is None:
|
|
138
|
+
raise RuntimeError("Tracer database backend is not initialized")
|
|
139
|
+
|
|
136
140
|
model_usage = await tracer.db.get_model_usage()
|
|
137
141
|
print("\nModel Usage:")
|
|
138
142
|
print(model_usage)
|
|
@@ -150,9 +154,10 @@ async def main():
|
|
|
150
154
|
# Get specific session details
|
|
151
155
|
if recent_sessions:
|
|
152
156
|
session_detail = await tracer.db.get_session_trace(recent_sessions[0]["session_id"])
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
157
|
+
if session_detail:
|
|
158
|
+
print(f"\nSession Detail for {session_detail['session_id']}:")
|
|
159
|
+
print(f" Created: {session_detail['created_at']}")
|
|
160
|
+
print(f" Timesteps: {len(session_detail['timesteps'])}")
|
|
156
161
|
|
|
157
162
|
# Example 4: Using hooks
|
|
158
163
|
print("\n--- Example 4: Hooks ---")
|
|
@@ -166,8 +171,9 @@ async def main():
|
|
|
166
171
|
|
|
167
172
|
tracer.hooks.register("event_recorded", count_events, name="event_counter")
|
|
168
173
|
|
|
169
|
-
async with
|
|
170
|
-
"
|
|
174
|
+
async with (
|
|
175
|
+
tracer.session(metadata={"example": "hooks"}) as session_id,
|
|
176
|
+
tracer.timestep("hook_test"),
|
|
171
177
|
):
|
|
172
178
|
for i in range(3):
|
|
173
179
|
event = RuntimeEvent(
|
synth_ai/tracing_v3/hooks.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
1
|
"""Hook system for extending tracing functionality.
|
|
3
2
|
|
|
4
3
|
The hook system provides a flexible way to extend the tracing system without
|
|
@@ -33,6 +32,8 @@ Common Use Cases:
|
|
|
33
32
|
- Custom filtering and sampling
|
|
34
33
|
"""
|
|
35
34
|
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
36
37
|
import asyncio
|
|
37
38
|
from collections.abc import Callable
|
|
38
39
|
from dataclasses import dataclass
|
|
@@ -88,9 +89,9 @@ class HookManager:
|
|
|
88
89
|
self,
|
|
89
90
|
event: str,
|
|
90
91
|
callback: Callable,
|
|
91
|
-
name: str = None,
|
|
92
|
+
name: str | None = None,
|
|
92
93
|
priority: int = 0,
|
|
93
|
-
event_types: list[str] = None,
|
|
94
|
+
event_types: list[str] | None = None,
|
|
94
95
|
) -> Hook:
|
|
95
96
|
"""Register a new hook.
|
|
96
97
|
|
|
@@ -114,7 +115,7 @@ class HookManager:
|
|
|
114
115
|
raise ValueError(f"Unknown hook event: {event}")
|
|
115
116
|
|
|
116
117
|
hook = Hook(
|
|
117
|
-
name=name or callback
|
|
118
|
+
name=name or getattr(callback, "__name__", "unknown"),
|
|
118
119
|
callback=callback,
|
|
119
120
|
event_types=event_types,
|
|
120
121
|
priority=priority,
|
|
@@ -202,6 +203,7 @@ def create_default_hooks() -> HookManager:
|
|
|
202
203
|
# Example: Log session starts - useful for debugging and monitoring
|
|
203
204
|
async def log_session_start(session_id: str, metadata: dict[str, Any]):
|
|
204
205
|
import os
|
|
206
|
+
|
|
205
207
|
if os.getenv("SYNTH_TRACE_VERBOSE", "0") in ("1", "true", "True"):
|
|
206
208
|
print(f"Session started: {session_id}")
|
|
207
209
|
|
|
@@ -4,12 +4,14 @@ This module provides utilities to convert vendor responses to LLMCallRecord
|
|
|
4
4
|
format and compute aggregates from call records.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
7
9
|
import uuid
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import UTC, datetime
|
|
12
|
+
from typing import Any, TypedDict, cast
|
|
10
13
|
|
|
11
|
-
from
|
|
12
|
-
from synth_ai.tracing_v3.lm_call_record_abstractions import (
|
|
14
|
+
from .lm_call_record_abstractions import (
|
|
13
15
|
LLMCallRecord,
|
|
14
16
|
LLMChunk,
|
|
15
17
|
LLMContentPart,
|
|
@@ -20,6 +22,63 @@ from synth_ai.tracing_v3.lm_call_record_abstractions import (
|
|
|
20
22
|
)
|
|
21
23
|
|
|
22
24
|
|
|
25
|
+
class BaseLMResponse:
|
|
26
|
+
"""
|
|
27
|
+
Standard response format from language model API calls.
|
|
28
|
+
|
|
29
|
+
This is a simple dataclass-like object for compatibility with tracing helpers.
|
|
30
|
+
Can be used as a dict-like object or with attributes.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
raw_response: The raw text response from the model
|
|
34
|
+
structured_output: Optional parsed Pydantic model if structured output was requested
|
|
35
|
+
tool_calls: Optional list of tool calls if tools were provided
|
|
36
|
+
response_id: Optional response ID for thread management (Responses API)
|
|
37
|
+
reasoning: Optional reasoning trace from the model (o1 models)
|
|
38
|
+
api_type: Optional API type used ("chat", "responses", or "harmony")
|
|
39
|
+
usage: Optional usage dictionary with token counts and costs
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
raw_response: str,
|
|
45
|
+
structured_output: Any | None = None,
|
|
46
|
+
tool_calls: list[dict] | None = None,
|
|
47
|
+
response_id: str | None = None,
|
|
48
|
+
reasoning: str | None = None,
|
|
49
|
+
api_type: str | None = None,
|
|
50
|
+
usage: dict[str, Any] | None = None,
|
|
51
|
+
):
|
|
52
|
+
self.raw_response = raw_response
|
|
53
|
+
self.structured_output = structured_output
|
|
54
|
+
self.tool_calls = tool_calls
|
|
55
|
+
self.response_id = response_id
|
|
56
|
+
self.reasoning = reasoning
|
|
57
|
+
self.api_type = api_type
|
|
58
|
+
self.usage = usage
|
|
59
|
+
|
|
60
|
+
def __getitem__(self, key: str) -> Any:
|
|
61
|
+
"""Allow dict-like access for backward compatibility."""
|
|
62
|
+
return getattr(self, key)
|
|
63
|
+
|
|
64
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
65
|
+
"""Allow dict-like .get() for backward compatibility."""
|
|
66
|
+
return getattr(self, key, default)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class _UsageDict(TypedDict, total=False):
|
|
70
|
+
prompt_tokens: int
|
|
71
|
+
completion_tokens: int
|
|
72
|
+
total_tokens: int
|
|
73
|
+
reasoning_tokens: int
|
|
74
|
+
cost_usd: float
|
|
75
|
+
duration_ms: int
|
|
76
|
+
reasoning_input_tokens: int
|
|
77
|
+
reasoning_output_tokens: int
|
|
78
|
+
cache_write_tokens: int
|
|
79
|
+
cache_read_tokens: int
|
|
80
|
+
|
|
81
|
+
|
|
23
82
|
def create_llm_call_record_from_response(
|
|
24
83
|
response: BaseLMResponse,
|
|
25
84
|
model_name: str,
|
|
@@ -110,9 +169,10 @@ def create_llm_call_record_from_response(
|
|
|
110
169
|
)
|
|
111
170
|
|
|
112
171
|
# Extract tool calls if present
|
|
113
|
-
output_tool_calls = []
|
|
114
|
-
|
|
115
|
-
|
|
172
|
+
output_tool_calls: list[ToolCallSpec] = []
|
|
173
|
+
tool_calls_data = cast(list[dict[str, Any]] | None, getattr(response, "tool_calls", None))
|
|
174
|
+
if tool_calls_data:
|
|
175
|
+
for idx, tool_call in enumerate(tool_calls_data):
|
|
116
176
|
if isinstance(tool_call, dict):
|
|
117
177
|
output_tool_calls.append(
|
|
118
178
|
ToolCallSpec(
|
|
@@ -125,18 +185,19 @@ def create_llm_call_record_from_response(
|
|
|
125
185
|
|
|
126
186
|
# Extract usage information
|
|
127
187
|
usage = None
|
|
128
|
-
|
|
188
|
+
usage_data = cast(_UsageDict | None, getattr(response, "usage", None))
|
|
189
|
+
if usage_data:
|
|
129
190
|
usage = LLMUsage(
|
|
130
|
-
input_tokens=
|
|
131
|
-
output_tokens=
|
|
132
|
-
total_tokens=
|
|
133
|
-
cost_usd=
|
|
191
|
+
input_tokens=usage_data.get("input_tokens"),
|
|
192
|
+
output_tokens=usage_data.get("output_tokens"),
|
|
193
|
+
total_tokens=usage_data.get("total_tokens"),
|
|
194
|
+
cost_usd=usage_data.get("cost_usd"),
|
|
134
195
|
# Additional token accounting if available
|
|
135
|
-
reasoning_tokens=
|
|
136
|
-
reasoning_input_tokens=
|
|
137
|
-
reasoning_output_tokens=
|
|
138
|
-
cache_write_tokens=
|
|
139
|
-
cache_read_tokens=
|
|
196
|
+
reasoning_tokens=usage_data.get("reasoning_tokens"),
|
|
197
|
+
reasoning_input_tokens=usage_data.get("reasoning_input_tokens"),
|
|
198
|
+
reasoning_output_tokens=usage_data.get("reasoning_output_tokens"),
|
|
199
|
+
cache_write_tokens=usage_data.get("cache_write_tokens"),
|
|
200
|
+
cache_read_tokens=usage_data.get("cache_read_tokens"),
|
|
140
201
|
)
|
|
141
202
|
|
|
142
203
|
# Build request parameters
|
|
@@ -149,11 +210,15 @@ def create_llm_call_record_from_response(
|
|
|
149
210
|
)
|
|
150
211
|
|
|
151
212
|
# Handle response-specific fields
|
|
152
|
-
finish_reason = None
|
|
213
|
+
finish_reason: str | None = None
|
|
153
214
|
if hasattr(response, "finish_reason"):
|
|
154
|
-
finish_reason = response
|
|
215
|
+
finish_reason = getattr(response, "finish_reason", None)
|
|
216
|
+
if finish_reason is not None:
|
|
217
|
+
finish_reason = str(finish_reason)
|
|
155
218
|
elif hasattr(response, "stop_reason"):
|
|
156
|
-
|
|
219
|
+
stop_reason = getattr(response, "stop_reason", None)
|
|
220
|
+
if stop_reason is not None:
|
|
221
|
+
finish_reason = str(stop_reason)
|
|
157
222
|
|
|
158
223
|
# Create the call record
|
|
159
224
|
record = LLMCallRecord(
|
|
@@ -161,8 +226,8 @@ def create_llm_call_record_from_response(
|
|
|
161
226
|
api_type=api_type,
|
|
162
227
|
provider=provider,
|
|
163
228
|
model_name=model_name,
|
|
164
|
-
started_at=started_at or datetime.
|
|
165
|
-
completed_at=completed_at or datetime.
|
|
229
|
+
started_at=started_at or datetime.now(UTC),
|
|
230
|
+
completed_at=completed_at or datetime.now(UTC),
|
|
166
231
|
latency_ms=latency_ms,
|
|
167
232
|
request_params=params,
|
|
168
233
|
input_messages=input_messages,
|
|
@@ -188,7 +253,45 @@ def create_llm_call_record_from_response(
|
|
|
188
253
|
return record
|
|
189
254
|
|
|
190
255
|
|
|
191
|
-
|
|
256
|
+
@dataclass
|
|
257
|
+
class _AggregateAccumulator:
|
|
258
|
+
"""Mutable accumulator for call record aggregates."""
|
|
259
|
+
|
|
260
|
+
call_count: int = 0
|
|
261
|
+
input_tokens: int = 0
|
|
262
|
+
output_tokens: int = 0
|
|
263
|
+
total_tokens: int = 0
|
|
264
|
+
reasoning_tokens: int = 0
|
|
265
|
+
cost_usd: float = 0.0
|
|
266
|
+
latency_ms: int = 0
|
|
267
|
+
models_used: set[str] = field(default_factory=set)
|
|
268
|
+
providers_used: set[str] = field(default_factory=set)
|
|
269
|
+
tool_calls_count: int = 0
|
|
270
|
+
error_count: int = 0
|
|
271
|
+
success_count: int = 0
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class AggregateSummary(TypedDict, total=False):
|
|
275
|
+
"""Aggregate metrics derived from call records."""
|
|
276
|
+
|
|
277
|
+
call_count: int
|
|
278
|
+
input_tokens: int
|
|
279
|
+
output_tokens: int
|
|
280
|
+
total_tokens: int
|
|
281
|
+
reasoning_tokens: int
|
|
282
|
+
cost_usd: float
|
|
283
|
+
latency_ms: int
|
|
284
|
+
models_used: list[str]
|
|
285
|
+
providers_used: list[str]
|
|
286
|
+
tool_calls_count: int
|
|
287
|
+
error_count: int
|
|
288
|
+
success_count: int
|
|
289
|
+
avg_latency_ms: float
|
|
290
|
+
avg_input_tokens: float
|
|
291
|
+
avg_output_tokens: float
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def compute_aggregates_from_call_records(call_records: list[LLMCallRecord]) -> AggregateSummary:
|
|
192
295
|
"""Compute aggregate statistics from a list of LLMCallRecord instances.
|
|
193
296
|
|
|
194
297
|
Args:
|
|
@@ -197,65 +300,62 @@ def compute_aggregates_from_call_records(call_records: list[LLMCallRecord]) -> d
|
|
|
197
300
|
Returns:
|
|
198
301
|
Dictionary containing aggregated statistics
|
|
199
302
|
"""
|
|
200
|
-
aggregates =
|
|
201
|
-
"input_tokens": 0,
|
|
202
|
-
"output_tokens": 0,
|
|
203
|
-
"total_tokens": 0,
|
|
204
|
-
"reasoning_tokens": 0,
|
|
205
|
-
"cost_usd": 0.0,
|
|
206
|
-
"latency_ms": 0,
|
|
207
|
-
"models_used": set(),
|
|
208
|
-
"providers_used": set(),
|
|
209
|
-
"tool_calls_count": 0,
|
|
210
|
-
"error_count": 0,
|
|
211
|
-
"success_count": 0,
|
|
212
|
-
"call_count": len(call_records),
|
|
213
|
-
}
|
|
303
|
+
aggregates = _AggregateAccumulator(call_count=len(call_records))
|
|
214
304
|
|
|
215
305
|
for record in call_records:
|
|
216
306
|
# Token aggregation
|
|
217
307
|
if record.usage:
|
|
218
308
|
if record.usage.input_tokens:
|
|
219
|
-
aggregates
|
|
309
|
+
aggregates.input_tokens += record.usage.input_tokens
|
|
220
310
|
if record.usage.output_tokens:
|
|
221
|
-
aggregates
|
|
311
|
+
aggregates.output_tokens += record.usage.output_tokens
|
|
222
312
|
if record.usage.total_tokens:
|
|
223
|
-
aggregates
|
|
313
|
+
aggregates.total_tokens += record.usage.total_tokens
|
|
224
314
|
if record.usage.reasoning_tokens:
|
|
225
|
-
aggregates
|
|
315
|
+
aggregates.reasoning_tokens += record.usage.reasoning_tokens
|
|
226
316
|
if record.usage.cost_usd:
|
|
227
|
-
aggregates
|
|
317
|
+
aggregates.cost_usd += record.usage.cost_usd
|
|
228
318
|
|
|
229
319
|
# Latency aggregation
|
|
230
|
-
if record.latency_ms:
|
|
231
|
-
aggregates
|
|
320
|
+
if record.latency_ms is not None:
|
|
321
|
+
aggregates.latency_ms += record.latency_ms
|
|
232
322
|
|
|
233
323
|
# Model and provider tracking
|
|
234
324
|
if record.model_name:
|
|
235
|
-
aggregates
|
|
325
|
+
aggregates.models_used.add(record.model_name)
|
|
236
326
|
if record.provider:
|
|
237
|
-
aggregates
|
|
327
|
+
aggregates.providers_used.add(record.provider)
|
|
238
328
|
|
|
239
329
|
# Tool calls
|
|
240
|
-
aggregates
|
|
330
|
+
aggregates.tool_calls_count += len(record.output_tool_calls)
|
|
241
331
|
|
|
242
332
|
# Success/error tracking
|
|
243
333
|
if record.outcome == "error":
|
|
244
|
-
aggregates
|
|
334
|
+
aggregates.error_count += 1
|
|
245
335
|
elif record.outcome == "success":
|
|
246
|
-
aggregates
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
336
|
+
aggregates.success_count += 1
|
|
337
|
+
|
|
338
|
+
summary: AggregateSummary = {
|
|
339
|
+
"call_count": aggregates.call_count,
|
|
340
|
+
"input_tokens": aggregates.input_tokens,
|
|
341
|
+
"output_tokens": aggregates.output_tokens,
|
|
342
|
+
"total_tokens": aggregates.total_tokens,
|
|
343
|
+
"reasoning_tokens": aggregates.reasoning_tokens,
|
|
344
|
+
"cost_usd": aggregates.cost_usd,
|
|
345
|
+
"latency_ms": aggregates.latency_ms,
|
|
346
|
+
"models_used": list(aggregates.models_used),
|
|
347
|
+
"providers_used": list(aggregates.providers_used),
|
|
348
|
+
"tool_calls_count": aggregates.tool_calls_count,
|
|
349
|
+
"error_count": aggregates.error_count,
|
|
350
|
+
"success_count": aggregates.success_count,
|
|
351
|
+
}
|
|
251
352
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
aggregates["avg_output_tokens"] = aggregates["output_tokens"] / aggregates["call_count"]
|
|
353
|
+
if aggregates.call_count > 0:
|
|
354
|
+
summary["avg_latency_ms"] = aggregates.latency_ms / aggregates.call_count
|
|
355
|
+
summary["avg_input_tokens"] = aggregates.input_tokens / aggregates.call_count
|
|
356
|
+
summary["avg_output_tokens"] = aggregates.output_tokens / aggregates.call_count
|
|
257
357
|
|
|
258
|
-
return
|
|
358
|
+
return summary
|
|
259
359
|
|
|
260
360
|
|
|
261
361
|
def create_llm_call_record_from_streaming(
|
|
@@ -322,8 +422,8 @@ def create_llm_call_record_from_streaming(
|
|
|
322
422
|
api_type="responses", # Streaming typically from Responses API
|
|
323
423
|
provider=provider,
|
|
324
424
|
model_name=model_name,
|
|
325
|
-
started_at=started_at or datetime.
|
|
326
|
-
completed_at=completed_at or datetime.
|
|
425
|
+
started_at=started_at or datetime.now(UTC),
|
|
426
|
+
completed_at=completed_at or datetime.now(UTC),
|
|
327
427
|
latency_ms=latency_ms,
|
|
328
428
|
request_params=params,
|
|
329
429
|
input_messages=input_messages,
|
|
@@ -68,7 +68,7 @@ def categorize_files(v2_files: list[tuple[str, list[str]]]) -> dict:
|
|
|
68
68
|
categories["examples"].append((file_path, imports))
|
|
69
69
|
elif any(
|
|
70
70
|
core in file_path
|
|
71
|
-
for core in ["synth_ai/lm/", "synth_ai/
|
|
71
|
+
for core in ["synth_ai/lm/", "synth_ai/environments/"]
|
|
72
72
|
):
|
|
73
73
|
categories["core_library"].append((file_path, imports))
|
|
74
74
|
else:
|
|
@@ -104,7 +104,6 @@ def print_migration_report():
|
|
|
104
104
|
print("2. Debug scripts: Can be deleted or archived")
|
|
105
105
|
print("3. Core library files: Need careful migration to v3")
|
|
106
106
|
print(" - synth_ai/lm/core/main_v2.py")
|
|
107
|
-
print(" - synth_ai/tui/cli/query_experiments.py")
|
|
108
107
|
print(" - synth_ai/environments/service/core_routes.py")
|
|
109
108
|
print("4. Examples: Should be updated to demonstrate v3 usage")
|
|
110
109
|
|
|
@@ -25,14 +25,15 @@ application to continue without blocking on sync operations.
|
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
27
|
import asyncio
|
|
28
|
+
import importlib
|
|
28
29
|
import logging
|
|
29
|
-
|
|
30
|
-
import libsql
|
|
30
|
+
from typing import Any, cast
|
|
31
31
|
|
|
32
32
|
from .config import CONFIG
|
|
33
33
|
|
|
34
34
|
logger = logging.getLogger(__name__)
|
|
35
35
|
|
|
36
|
+
libsql = cast(Any, importlib.import_module("libsql"))
|
|
36
37
|
|
|
37
38
|
class ReplicaSync:
|
|
38
39
|
"""Manages synchronization of embedded SQLite replica with remote Turso database.
|
|
@@ -52,7 +53,7 @@ class ReplicaSync:
|
|
|
52
53
|
db_path: str = "embedded.db",
|
|
53
54
|
sync_url: str | None = None,
|
|
54
55
|
auth_token: str | None = None,
|
|
55
|
-
sync_interval:
|
|
56
|
+
sync_interval: float | None = None,
|
|
56
57
|
):
|
|
57
58
|
"""Initialize replica sync manager.
|
|
58
59
|
|
|
@@ -66,8 +67,8 @@ class ReplicaSync:
|
|
|
66
67
|
self.sync_url = sync_url or CONFIG.sync_url
|
|
67
68
|
self.auth_token = auth_token or CONFIG.auth_token
|
|
68
69
|
self.sync_interval = sync_interval or CONFIG.sync_interval
|
|
69
|
-
self._sync_task: asyncio.Task | None = None
|
|
70
|
-
self._conn:
|
|
70
|
+
self._sync_task: asyncio.Task[Any] | None = None
|
|
71
|
+
self._conn: Any | None = None
|
|
71
72
|
|
|
72
73
|
def _ensure_connection(self):
|
|
73
74
|
"""Ensure libsql connection is established.
|
|
@@ -113,8 +114,11 @@ class ReplicaSync:
|
|
|
113
114
|
"""
|
|
114
115
|
try:
|
|
115
116
|
self._ensure_connection()
|
|
117
|
+
conn = self._conn
|
|
118
|
+
if conn is None:
|
|
119
|
+
raise RuntimeError("Replica sync connection is not available after initialization")
|
|
116
120
|
# Run sync in thread pool since libsql sync is blocking
|
|
117
|
-
await asyncio.to_thread(
|
|
121
|
+
await asyncio.to_thread(conn.sync)
|
|
118
122
|
logger.info("Successfully synced with remote Turso database")
|
|
119
123
|
return True
|
|
120
124
|
except Exception as e:
|
|
@@ -146,7 +150,7 @@ class ReplicaSync:
|
|
|
146
150
|
# Sleep until next sync interval
|
|
147
151
|
await asyncio.sleep(self.sync_interval)
|
|
148
152
|
|
|
149
|
-
def start_background_sync(self) -> asyncio.Task:
|
|
153
|
+
def start_background_sync(self) -> asyncio.Task[Any]:
|
|
150
154
|
"""Start the background sync task.
|
|
151
155
|
|
|
152
156
|
Creates an asyncio task that runs the sync loop. The task is stored
|
|
@@ -180,6 +184,7 @@ class ReplicaSync:
|
|
|
180
184
|
# Request cancellation
|
|
181
185
|
self._sync_task.cancel()
|
|
182
186
|
import contextlib
|
|
187
|
+
|
|
183
188
|
with contextlib.suppress(asyncio.CancelledError):
|
|
184
189
|
# Wait for the task to finish
|
|
185
190
|
await self._sync_task
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""HTTP-safe serialization helpers for tracing v3.
|
|
2
|
+
|
|
3
|
+
These utilities normalize tracing structures (including dataclasses) into
|
|
4
|
+
JSON-serializable forms and provide a compact JSON encoder suitable for
|
|
5
|
+
HTTP transmission to backend services.
|
|
6
|
+
|
|
7
|
+
Design goals:
|
|
8
|
+
- Preserve structure while ensuring standard-compliant JSON (no NaN/Infinity)
|
|
9
|
+
- Handle common non-JSON types: datetime, Decimal, bytes, set/tuple, numpy scalars
|
|
10
|
+
- Keep output compact (no unnecessary whitespace) while readable if needed
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import base64
|
|
16
|
+
import json
|
|
17
|
+
from dataclasses import asdict, is_dataclass
|
|
18
|
+
from datetime import date, datetime
|
|
19
|
+
from decimal import Decimal
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import numpy as _np # type: ignore
|
|
25
|
+
except Exception: # pragma: no cover - numpy optional at runtime
|
|
26
|
+
_np = None # type: ignore
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def normalize_for_json(value: Any) -> Any:
|
|
30
|
+
"""Return a JSON-serializable version of ``value``.
|
|
31
|
+
|
|
32
|
+
Rules:
|
|
33
|
+
- dataclass → dict (recursively normalized)
|
|
34
|
+
- datetime/date → ISO-8601 string (UTC-aware datetimes preserve tzinfo)
|
|
35
|
+
- Decimal → float (fallback to string if not finite)
|
|
36
|
+
- bytes/bytearray → base64 string (RFC 4648)
|
|
37
|
+
- set/tuple → list
|
|
38
|
+
- Enum → enum.value (normalized)
|
|
39
|
+
- numpy scalar → corresponding Python scalar
|
|
40
|
+
- float NaN/Inf/−Inf → None (to keep JSON standard compliant)
|
|
41
|
+
- dict / list → recursively normalized
|
|
42
|
+
- other primitives (str, int, bool, None, float) passed through
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
# Dataclasses
|
|
46
|
+
if is_dataclass(value) and not isinstance(value, type):
|
|
47
|
+
try:
|
|
48
|
+
return normalize_for_json(asdict(value))
|
|
49
|
+
except Exception:
|
|
50
|
+
# Fallback: best-effort conversion via __dict__
|
|
51
|
+
return normalize_for_json(getattr(value, "__dict__", {}))
|
|
52
|
+
|
|
53
|
+
# Mapping
|
|
54
|
+
if isinstance(value, dict):
|
|
55
|
+
return {str(k): normalize_for_json(v) for k, v in value.items()}
|
|
56
|
+
|
|
57
|
+
# Sequences
|
|
58
|
+
if isinstance(value, list | tuple | set):
|
|
59
|
+
return [normalize_for_json(v) for v in value]
|
|
60
|
+
|
|
61
|
+
# Datetime / Date
|
|
62
|
+
if isinstance(value, datetime | date):
|
|
63
|
+
return value.isoformat()
|
|
64
|
+
|
|
65
|
+
# Decimal
|
|
66
|
+
if isinstance(value, Decimal):
|
|
67
|
+
try:
|
|
68
|
+
f = float(value)
|
|
69
|
+
if f != f or f in (float("inf"), float("-inf")):
|
|
70
|
+
return str(value)
|
|
71
|
+
return f
|
|
72
|
+
except Exception:
|
|
73
|
+
return str(value)
|
|
74
|
+
|
|
75
|
+
# Bytes-like
|
|
76
|
+
if isinstance(value, bytes | bytearray):
|
|
77
|
+
return base64.b64encode(bytes(value)).decode("ascii")
|
|
78
|
+
|
|
79
|
+
# Enum
|
|
80
|
+
if isinstance(value, Enum):
|
|
81
|
+
return normalize_for_json(value.value)
|
|
82
|
+
|
|
83
|
+
# Numpy scalars / arrays
|
|
84
|
+
if _np is not None:
|
|
85
|
+
if isinstance(value, _np.generic): # type: ignore[attr-defined]
|
|
86
|
+
return normalize_for_json(value.item())
|
|
87
|
+
if isinstance(value, _np.ndarray):
|
|
88
|
+
return normalize_for_json(value.tolist())
|
|
89
|
+
|
|
90
|
+
# Floats: sanitize NaN / Infinity to None
|
|
91
|
+
if isinstance(value, float):
|
|
92
|
+
if value != value or value in (float("inf"), float("-inf")):
|
|
93
|
+
return None
|
|
94
|
+
return value
|
|
95
|
+
|
|
96
|
+
return value
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def dumps_http_json(payload: Any) -> str:
|
|
100
|
+
"""Dump ``payload`` into a compact, HTTP-safe JSON string.
|
|
101
|
+
|
|
102
|
+
- Recursively normalizes non-JSON types (see ``normalize_for_json``)
|
|
103
|
+
- Disallows NaN/Infinity per RFC 8259 (allow_nan=False)
|
|
104
|
+
- Uses compact separators and preserves Unicode (ensure_ascii=False)
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
normalized = normalize_for_json(payload)
|
|
108
|
+
return json.dumps(
|
|
109
|
+
normalized,
|
|
110
|
+
ensure_ascii=False,
|
|
111
|
+
allow_nan=False,
|
|
112
|
+
separators=(",", ":"),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def serialize_trace_for_http(trace: Any) -> str:
|
|
117
|
+
"""Serialize a tracing v3 session (or dict-like) to HTTP-safe JSON.
|
|
118
|
+
|
|
119
|
+
Accepts either a dataclass (e.g., SessionTrace) or a dict/list and
|
|
120
|
+
applies normalization and compact JSON encoding.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
if is_dataclass(trace) and not isinstance(trace, type):
|
|
124
|
+
try:
|
|
125
|
+
return dumps_http_json(asdict(trace))
|
|
126
|
+
except Exception:
|
|
127
|
+
return dumps_http_json(getattr(trace, "__dict__", {}))
|
|
128
|
+
return dumps_http_json(trace)
|
|
129
|
+
|
|
130
|
+
|