synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +60 -2
- synth_ai/api/train/builders.py +347 -39
- synth_ai/api/train/cli.py +895 -160
- synth_ai/api/train/config_finder.py +103 -25
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +70 -20
- synth_ai/api/train/pollers.py +29 -4
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +6 -4
- synth_ai/api/train/utils.py +64 -52
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +85 -63
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +156 -116
- synth_ai/cli/root.py +131 -132
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +2284 -257
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +579 -291
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +50 -30
- synth_ai/task/apps/__init__.py +63 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +165 -64
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +59 -66
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +65 -31
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +44 -28
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +73 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -258
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -107
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/task/apps/grpo_crafter.py +0 -438
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
- synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1293 @@
|
|
|
1
|
+
"""LibSQL-native trace manager prototype.
|
|
2
|
+
|
|
3
|
+
This module provides the Turso/libsql-backed trace storage implementation. It
|
|
4
|
+
mirrors the public surface area of the historical SQLAlchemy manager while
|
|
5
|
+
executing all operations directly via libsql.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
from dataclasses import asdict, dataclass
|
|
16
|
+
from datetime import UTC, datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
19
|
+
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
|
20
|
+
|
|
21
|
+
import httpx
|
|
22
|
+
import libsql
|
|
23
|
+
from sqlalchemy.engine import make_url
|
|
24
|
+
|
|
25
|
+
from ..abstractions import (
|
|
26
|
+
EnvironmentEvent,
|
|
27
|
+
LMCAISEvent,
|
|
28
|
+
RuntimeEvent,
|
|
29
|
+
SessionMessageContent,
|
|
30
|
+
SessionTrace,
|
|
31
|
+
)
|
|
32
|
+
from ..config import CONFIG
|
|
33
|
+
from ..storage.base import TraceStorage
|
|
34
|
+
from .models import analytics_views
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from sqlite3 import Connection as LibsqlConnection
|
|
38
|
+
else: # pragma: no cover - runtime fallback for typing only
|
|
39
|
+
LibsqlConnection = Any # type: ignore[assignment]
|
|
40
|
+
|
|
41
|
+
_LIBSQL_CONNECT_ATTR = getattr(libsql, "connect", None)
|
|
42
|
+
if _LIBSQL_CONNECT_ATTR is None: # pragma: no cover - defensive guard
|
|
43
|
+
raise RuntimeError("libsql.connect is required for NativeLibsqlTraceManager")
|
|
44
|
+
_libsql_connect: Callable[..., LibsqlConnection] = cast(
|
|
45
|
+
Callable[..., LibsqlConnection],
|
|
46
|
+
_LIBSQL_CONNECT_ATTR,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
try: # pragma: no cover - exercised only when pandas present
|
|
50
|
+
import pandas as pd # type: ignore
|
|
51
|
+
except Exception: # pragma: no cover
|
|
52
|
+
pd = None # type: ignore[assignment]
|
|
53
|
+
|
|
54
|
+
logger = logging.getLogger(__name__)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(slots=True)
|
|
58
|
+
class _ConnectionTarget:
|
|
59
|
+
"""Resolved connection target for libsql."""
|
|
60
|
+
|
|
61
|
+
database: str
|
|
62
|
+
sync_url: str | None = None
|
|
63
|
+
auth_token: str | None = None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _strip_auth_component(url: str) -> tuple[str, str | None]:
|
|
67
|
+
"""Remove auth_token query parameter from URL, returning the token separately."""
|
|
68
|
+
parsed = urlparse(url)
|
|
69
|
+
if not parsed.query:
|
|
70
|
+
return url, None
|
|
71
|
+
|
|
72
|
+
params = dict(parse_qsl(parsed.query, keep_blank_values=True))
|
|
73
|
+
token = params.pop("auth_token", None)
|
|
74
|
+
query = urlencode(params, doseq=True)
|
|
75
|
+
sanitised = urlunparse(parsed._replace(query=query))
|
|
76
|
+
return sanitised, token
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _resolve_connection_target(db_url: str | None, auth_token: str | None) -> _ConnectionTarget:
|
|
80
|
+
"""Normalise the configured database URL."""
|
|
81
|
+
url = db_url or CONFIG.db_url
|
|
82
|
+
sanitised, token_from_url = _strip_auth_component(url)
|
|
83
|
+
effective_token = auth_token or token_from_url or CONFIG.auth_token
|
|
84
|
+
|
|
85
|
+
# SQLAlchemy-compatible libsql scheme (`sqlite+libsql://<endpoint or path>`)
|
|
86
|
+
if sanitised.startswith("sqlite+libsql://"):
|
|
87
|
+
raise RuntimeError("sqlite+libsql scheme is no longer supported; use libsql://")
|
|
88
|
+
|
|
89
|
+
# Plain SQLite files: file://, /absolute/path, or relative path
|
|
90
|
+
# libsql.connect() handles these without sync_url or auth_token
|
|
91
|
+
if sanitised.startswith("file://") or sanitised.startswith("/") or "://" not in sanitised:
|
|
92
|
+
# Strip file:// prefix if present, libsql.connect handles both formats
|
|
93
|
+
db_path = sanitised.replace("file://", "") if sanitised.startswith("file://") else sanitised
|
|
94
|
+
return _ConnectionTarget(database=db_path, sync_url=None, auth_token=None)
|
|
95
|
+
|
|
96
|
+
# Native libsql URLs (`libsql://...`).
|
|
97
|
+
if sanitised.startswith("libsql://"):
|
|
98
|
+
return _ConnectionTarget(database=sanitised, sync_url=sanitised, auth_token=effective_token)
|
|
99
|
+
|
|
100
|
+
# Fallback to SQLAlchemy URL parsing for anything else we missed.
|
|
101
|
+
try:
|
|
102
|
+
parsed = make_url(sanitised)
|
|
103
|
+
driver = parsed.drivername.lower()
|
|
104
|
+
if driver.startswith("sqlite"):
|
|
105
|
+
database = parsed.database or ""
|
|
106
|
+
if database and database not in {":memory:", ":memory"}:
|
|
107
|
+
# Absolute paths are passed through; relative paths are resolved to cwd
|
|
108
|
+
if database.startswith("/"):
|
|
109
|
+
db_path = database
|
|
110
|
+
else:
|
|
111
|
+
db_path = str(Path(database).expanduser().resolve())
|
|
112
|
+
elif database in {":memory:", ":memory"}:
|
|
113
|
+
db_path = ":memory:"
|
|
114
|
+
else:
|
|
115
|
+
raise RuntimeError("SQLite URL missing database path.")
|
|
116
|
+
return _ConnectionTarget(database=db_path, sync_url=None, auth_token=None)
|
|
117
|
+
if driver.startswith("libsql"):
|
|
118
|
+
database = parsed.render_as_string(hide_password=False)
|
|
119
|
+
return _ConnectionTarget(database=database, sync_url=database, auth_token=effective_token)
|
|
120
|
+
except Exception: # pragma: no cover - defensive guardrail
|
|
121
|
+
logger.debug("Unable to parse db_url via SQLAlchemy", exc_info=True)
|
|
122
|
+
|
|
123
|
+
# Python libsql client uses HTTP API for http:// URLs, not Hrana WebSocket
|
|
124
|
+
# For local sqld with http:// URL, we need to ensure it points to the HTTP API port
|
|
125
|
+
# sqld uses two ports: Hrana WebSocket (e.g. 8080) and HTTP API (e.g. 8081)
|
|
126
|
+
# libsql.connect() with http:// uses HTTP API, so URL should point to HTTP API port
|
|
127
|
+
if sanitised.startswith(("http://", "https://", "libsql://")):
|
|
128
|
+
return _ConnectionTarget(database=sanitised, sync_url=sanitised, auth_token=effective_token)
|
|
129
|
+
raise RuntimeError(f"Unsupported tracing database URL: {sanitised}")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _json_dumps(value: Any) -> str | None:
|
|
133
|
+
"""Serialise Python objects as JSON compatible with the existing schema."""
|
|
134
|
+
|
|
135
|
+
def _default(obj: Any):
|
|
136
|
+
if isinstance(obj, datetime):
|
|
137
|
+
return obj.isoformat()
|
|
138
|
+
return str(obj)
|
|
139
|
+
|
|
140
|
+
if value is None:
|
|
141
|
+
return None
|
|
142
|
+
return json.dumps(value, separators=(",", ":"), default=_default)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _maybe_datetime(value: Any) -> Any:
|
|
146
|
+
if value is None or isinstance(value, datetime):
|
|
147
|
+
return value
|
|
148
|
+
if isinstance(value, str):
|
|
149
|
+
try:
|
|
150
|
+
return datetime.fromisoformat(value)
|
|
151
|
+
except ValueError:
|
|
152
|
+
pass
|
|
153
|
+
return value
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _load_json(value: Any) -> Any:
|
|
157
|
+
if value is None or isinstance(value, dict | list):
|
|
158
|
+
return value or {}
|
|
159
|
+
if isinstance(value, str):
|
|
160
|
+
try:
|
|
161
|
+
return json.loads(value)
|
|
162
|
+
except (TypeError, ValueError):
|
|
163
|
+
return {}
|
|
164
|
+
return value
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
_TABLE_DEFINITIONS: tuple[str, ...] = (
|
|
168
|
+
"""
|
|
169
|
+
CREATE TABLE IF NOT EXISTS experiments (
|
|
170
|
+
experiment_id VARCHAR PRIMARY KEY,
|
|
171
|
+
name VARCHAR NOT NULL,
|
|
172
|
+
description TEXT,
|
|
173
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
174
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
175
|
+
configuration TEXT,
|
|
176
|
+
metadata TEXT
|
|
177
|
+
)
|
|
178
|
+
""",
|
|
179
|
+
"""
|
|
180
|
+
CREATE TABLE IF NOT EXISTS systems (
|
|
181
|
+
system_id VARCHAR PRIMARY KEY,
|
|
182
|
+
name VARCHAR NOT NULL,
|
|
183
|
+
system_type VARCHAR,
|
|
184
|
+
description TEXT,
|
|
185
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
186
|
+
metadata TEXT
|
|
187
|
+
)
|
|
188
|
+
""",
|
|
189
|
+
"""
|
|
190
|
+
CREATE TABLE IF NOT EXISTS system_versions (
|
|
191
|
+
version_id VARCHAR PRIMARY KEY,
|
|
192
|
+
system_id VARCHAR NOT NULL,
|
|
193
|
+
version_number VARCHAR NOT NULL,
|
|
194
|
+
commit_hash VARCHAR,
|
|
195
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
196
|
+
configuration TEXT,
|
|
197
|
+
metadata TEXT,
|
|
198
|
+
FOREIGN KEY(system_id) REFERENCES systems(system_id),
|
|
199
|
+
UNIQUE(system_id, version_number)
|
|
200
|
+
)
|
|
201
|
+
""",
|
|
202
|
+
"""
|
|
203
|
+
CREATE TABLE IF NOT EXISTS experimental_systems (
|
|
204
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
205
|
+
experiment_id VARCHAR NOT NULL,
|
|
206
|
+
system_id VARCHAR NOT NULL,
|
|
207
|
+
version_id VARCHAR NOT NULL,
|
|
208
|
+
FOREIGN KEY(experiment_id) REFERENCES experiments(experiment_id),
|
|
209
|
+
FOREIGN KEY(system_id) REFERENCES systems(system_id),
|
|
210
|
+
FOREIGN KEY(version_id) REFERENCES system_versions(version_id)
|
|
211
|
+
)
|
|
212
|
+
""",
|
|
213
|
+
"""
|
|
214
|
+
CREATE TABLE IF NOT EXISTS session_traces (
|
|
215
|
+
session_id VARCHAR PRIMARY KEY,
|
|
216
|
+
created_at DATETIME NOT NULL,
|
|
217
|
+
num_timesteps INTEGER NOT NULL,
|
|
218
|
+
num_events INTEGER NOT NULL,
|
|
219
|
+
num_messages INTEGER NOT NULL,
|
|
220
|
+
metadata TEXT,
|
|
221
|
+
experiment_id VARCHAR,
|
|
222
|
+
embedding VECTOR,
|
|
223
|
+
FOREIGN KEY(experiment_id) REFERENCES experiments(experiment_id)
|
|
224
|
+
)
|
|
225
|
+
""",
|
|
226
|
+
"""
|
|
227
|
+
CREATE TABLE IF NOT EXISTS session_timesteps (
|
|
228
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
229
|
+
session_id VARCHAR NOT NULL,
|
|
230
|
+
step_id VARCHAR NOT NULL,
|
|
231
|
+
step_index INTEGER NOT NULL,
|
|
232
|
+
turn_number INTEGER,
|
|
233
|
+
started_at DATETIME,
|
|
234
|
+
completed_at DATETIME,
|
|
235
|
+
num_events INTEGER,
|
|
236
|
+
num_messages INTEGER,
|
|
237
|
+
step_metadata TEXT,
|
|
238
|
+
UNIQUE(session_id, step_id),
|
|
239
|
+
FOREIGN KEY(session_id) REFERENCES session_traces(session_id)
|
|
240
|
+
)
|
|
241
|
+
""",
|
|
242
|
+
"""
|
|
243
|
+
CREATE TABLE IF NOT EXISTS events (
|
|
244
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
245
|
+
session_id VARCHAR NOT NULL,
|
|
246
|
+
timestep_id INTEGER,
|
|
247
|
+
event_type VARCHAR NOT NULL,
|
|
248
|
+
system_instance_id VARCHAR,
|
|
249
|
+
event_time FLOAT,
|
|
250
|
+
message_time INTEGER,
|
|
251
|
+
created_at DATETIME,
|
|
252
|
+
model_name VARCHAR,
|
|
253
|
+
provider VARCHAR,
|
|
254
|
+
input_tokens INTEGER,
|
|
255
|
+
output_tokens INTEGER,
|
|
256
|
+
total_tokens INTEGER,
|
|
257
|
+
cost_usd INTEGER,
|
|
258
|
+
latency_ms INTEGER,
|
|
259
|
+
span_id VARCHAR,
|
|
260
|
+
trace_id VARCHAR,
|
|
261
|
+
call_records TEXT,
|
|
262
|
+
reward FLOAT,
|
|
263
|
+
terminated BOOLEAN,
|
|
264
|
+
truncated BOOLEAN,
|
|
265
|
+
system_state_before TEXT,
|
|
266
|
+
system_state_after TEXT,
|
|
267
|
+
metadata TEXT,
|
|
268
|
+
event_metadata TEXT,
|
|
269
|
+
embedding VECTOR,
|
|
270
|
+
CHECK (event_type IN ('cais', 'environment', 'runtime')),
|
|
271
|
+
FOREIGN KEY(session_id) REFERENCES session_traces(session_id),
|
|
272
|
+
FOREIGN KEY(timestep_id) REFERENCES session_timesteps(id)
|
|
273
|
+
)
|
|
274
|
+
""",
|
|
275
|
+
"""
|
|
276
|
+
CREATE TABLE IF NOT EXISTS messages (
|
|
277
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
278
|
+
session_id VARCHAR NOT NULL,
|
|
279
|
+
timestep_id INTEGER,
|
|
280
|
+
message_type VARCHAR NOT NULL,
|
|
281
|
+
content TEXT NOT NULL,
|
|
282
|
+
timestamp DATETIME,
|
|
283
|
+
event_time FLOAT,
|
|
284
|
+
message_time INTEGER,
|
|
285
|
+
metadata TEXT,
|
|
286
|
+
embedding VECTOR,
|
|
287
|
+
CHECK (message_type IN ('user', 'assistant', 'system', 'tool_use', 'tool_result')),
|
|
288
|
+
FOREIGN KEY(session_id) REFERENCES session_traces(session_id),
|
|
289
|
+
FOREIGN KEY(timestep_id) REFERENCES session_timesteps(id)
|
|
290
|
+
)
|
|
291
|
+
""",
|
|
292
|
+
"""
|
|
293
|
+
CREATE TABLE IF NOT EXISTS outcome_rewards (
|
|
294
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
295
|
+
session_id VARCHAR NOT NULL,
|
|
296
|
+
total_reward INTEGER NOT NULL,
|
|
297
|
+
achievements_count INTEGER NOT NULL,
|
|
298
|
+
total_steps INTEGER NOT NULL,
|
|
299
|
+
created_at DATETIME NOT NULL,
|
|
300
|
+
reward_metadata TEXT,
|
|
301
|
+
FOREIGN KEY(session_id) REFERENCES session_traces(session_id)
|
|
302
|
+
)
|
|
303
|
+
""",
|
|
304
|
+
"""
|
|
305
|
+
CREATE TABLE IF NOT EXISTS event_rewards (
|
|
306
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
307
|
+
event_id INTEGER NOT NULL,
|
|
308
|
+
session_id VARCHAR NOT NULL,
|
|
309
|
+
message_id INTEGER,
|
|
310
|
+
turn_number INTEGER,
|
|
311
|
+
reward_value FLOAT NOT NULL,
|
|
312
|
+
reward_type VARCHAR,
|
|
313
|
+
"key" VARCHAR,
|
|
314
|
+
annotation TEXT,
|
|
315
|
+
source VARCHAR,
|
|
316
|
+
created_at DATETIME NOT NULL,
|
|
317
|
+
FOREIGN KEY(event_id) REFERENCES events(id),
|
|
318
|
+
FOREIGN KEY(session_id) REFERENCES session_traces(session_id),
|
|
319
|
+
FOREIGN KEY(message_id) REFERENCES messages(id)
|
|
320
|
+
)
|
|
321
|
+
"""
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
_INDEX_DEFINITIONS: tuple[str, ...] = (
|
|
326
|
+
"CREATE INDEX IF NOT EXISTS idx_session_created ON session_traces (created_at)",
|
|
327
|
+
"CREATE INDEX IF NOT EXISTS idx_session_experiment ON session_traces (experiment_id)",
|
|
328
|
+
"CREATE INDEX IF NOT EXISTS idx_timestep_session_step ON session_timesteps (session_id, step_id)",
|
|
329
|
+
"CREATE INDEX IF NOT EXISTS idx_timestep_turn ON session_timesteps (turn_number)",
|
|
330
|
+
"CREATE INDEX IF NOT EXISTS idx_event_session_step ON events (session_id, timestep_id)",
|
|
331
|
+
"CREATE INDEX IF NOT EXISTS idx_event_type ON events (event_type)",
|
|
332
|
+
"CREATE INDEX IF NOT EXISTS idx_event_created ON events (created_at)",
|
|
333
|
+
"CREATE INDEX IF NOT EXISTS idx_event_model ON events (model_name)",
|
|
334
|
+
"CREATE INDEX IF NOT EXISTS idx_event_trace ON events (trace_id)",
|
|
335
|
+
"CREATE INDEX IF NOT EXISTS idx_message_session_step ON messages (session_id, timestep_id)",
|
|
336
|
+
"CREATE INDEX IF NOT EXISTS idx_message_type ON messages (message_type)",
|
|
337
|
+
"CREATE INDEX IF NOT EXISTS idx_message_timestamp ON messages (timestamp)",
|
|
338
|
+
"CREATE INDEX IF NOT EXISTS idx_experiment_created ON experiments (created_at)",
|
|
339
|
+
"CREATE INDEX IF NOT EXISTS idx_experiment_name ON experiments (name)",
|
|
340
|
+
"CREATE INDEX IF NOT EXISTS idx_system_name ON systems (name)",
|
|
341
|
+
"CREATE INDEX IF NOT EXISTS idx_system_type ON systems (system_type)",
|
|
342
|
+
"CREATE UNIQUE INDEX IF NOT EXISTS uq_system_version ON system_versions (system_id, version_number)",
|
|
343
|
+
"CREATE INDEX IF NOT EXISTS idx_version_system ON system_versions (system_id)",
|
|
344
|
+
"CREATE INDEX IF NOT EXISTS idx_version_created ON system_versions (created_at)",
|
|
345
|
+
"CREATE UNIQUE INDEX IF NOT EXISTS uq_experiment_system ON experimental_systems (experiment_id, system_id)",
|
|
346
|
+
"CREATE INDEX IF NOT EXISTS idx_experimental_system ON experimental_systems (experiment_id, system_id)",
|
|
347
|
+
"CREATE INDEX IF NOT EXISTS idx_outcome_rewards_session ON outcome_rewards (session_id)",
|
|
348
|
+
"CREATE INDEX IF NOT EXISTS idx_outcome_rewards_total ON outcome_rewards (total_reward)",
|
|
349
|
+
"CREATE INDEX IF NOT EXISTS idx_event_rewards_session ON event_rewards (session_id)",
|
|
350
|
+
"CREATE INDEX IF NOT EXISTS idx_event_rewards_event ON event_rewards (event_id)",
|
|
351
|
+
"CREATE INDEX IF NOT EXISTS idx_event_rewards_type ON event_rewards (reward_type)",
|
|
352
|
+
'CREATE INDEX IF NOT EXISTS idx_event_rewards_key ON event_rewards ("key")',
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
class NativeLibsqlTraceManager(TraceStorage):
|
|
357
|
+
"""Libsql-backed trace manager."""
|
|
358
|
+
|
|
359
|
+
def __init__(
|
|
360
|
+
self,
|
|
361
|
+
db_url: str | None = None,
|
|
362
|
+
*,
|
|
363
|
+
auth_token: str | None = None,
|
|
364
|
+
):
|
|
365
|
+
self._config_auth_token = auth_token
|
|
366
|
+
self._target = _resolve_connection_target(db_url, auth_token)
|
|
367
|
+
self._conn: LibsqlConnection | None = None
|
|
368
|
+
self._conn_lock = asyncio.Lock()
|
|
369
|
+
self._op_lock = asyncio.Lock()
|
|
370
|
+
self._initialized = False
|
|
371
|
+
|
|
372
|
+
def _open_connection(self) -> LibsqlConnection:
|
|
373
|
+
"""Open a libsql connection for the resolved target."""
|
|
374
|
+
kwargs: dict[str, Any] = {}
|
|
375
|
+
if self._target.sync_url and self._target.sync_url.startswith("libsql://"):
|
|
376
|
+
kwargs["sync_url"] = self._target.sync_url
|
|
377
|
+
if self._target.auth_token:
|
|
378
|
+
kwargs["auth_token"] = self._target.auth_token
|
|
379
|
+
# Disable automatic background sync; ReplicaSync drives this explicitly.
|
|
380
|
+
kwargs.setdefault("sync_interval", 0)
|
|
381
|
+
logger.debug("Opening libsql connection to %s", self._target.database)
|
|
382
|
+
return _libsql_connect(self._target.database, **kwargs)
|
|
383
|
+
|
|
384
|
+
async def initialize(self):
|
|
385
|
+
"""Initialise the backend."""
|
|
386
|
+
async with self._conn_lock:
|
|
387
|
+
if self._initialized:
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
# Fast-fail preflight: if using remote endpoint or local sqld, check health
|
|
391
|
+
# Skip health check for plain SQLite files (sync_url is None)
|
|
392
|
+
if self._target.sync_url:
|
|
393
|
+
try:
|
|
394
|
+
parsed = urlparse(self._target.database or "")
|
|
395
|
+
# Check for local sqld: http://, https://, or libsql://
|
|
396
|
+
if parsed.scheme in ("http", "https", "libsql"):
|
|
397
|
+
host_port = parsed.netloc or ""
|
|
398
|
+
host = (host_port.split(":", 1)[0] or "").strip().lower()
|
|
399
|
+
if host in {"127.0.0.1", "localhost"} and host_port:
|
|
400
|
+
# For http:// URLs, the port should already be the HTTP API port
|
|
401
|
+
# For libsql:// URLs, we need to calculate health check port
|
|
402
|
+
if ":" in host_port:
|
|
403
|
+
port = int(host_port.split(":", 1)[1])
|
|
404
|
+
if parsed.scheme == "libsql":
|
|
405
|
+
# libsql:// uses Hrana port, health check is on HTTP API port (Hrana + 1)
|
|
406
|
+
health_url = f"http://{host}:{port + 1}/health"
|
|
407
|
+
else:
|
|
408
|
+
# http:// already points to HTTP API port
|
|
409
|
+
health_url = f"http://{host}:{port}/health"
|
|
410
|
+
else:
|
|
411
|
+
health_url = f"http://{host_port}/health"
|
|
412
|
+
try:
|
|
413
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(1.0)) as client:
|
|
414
|
+
resp = await client.get(health_url)
|
|
415
|
+
if resp.status_code != 200:
|
|
416
|
+
raise RuntimeError(
|
|
417
|
+
f"Tracing backend unhealthy at {health_url} (status={resp.status_code})"
|
|
418
|
+
)
|
|
419
|
+
except Exception as exc: # pragma: no cover - network env dependent
|
|
420
|
+
raise RuntimeError(
|
|
421
|
+
f"Tracing backend not reachable at {health_url}. "
|
|
422
|
+
f"Start sqld with both ports: sqld --db-path <path> --hrana-listen-addr {host}:HRANA_PORT --http-listen-addr {host}:HTTP_PORT "
|
|
423
|
+
f"or disable tracing (TASKAPP_TRACING_ENABLED=0)."
|
|
424
|
+
) from exc
|
|
425
|
+
except Exception:
|
|
426
|
+
# Propagate any preflight failure to abort early
|
|
427
|
+
raise
|
|
428
|
+
|
|
429
|
+
# Establish a libsql connection for future native operations.
|
|
430
|
+
self._conn = self._open_connection()
|
|
431
|
+
self._ensure_schema()
|
|
432
|
+
self._initialized = True
|
|
433
|
+
|
|
434
|
+
async def close(self):
|
|
435
|
+
"""Close the libsql connection."""
|
|
436
|
+
async with self._conn_lock:
|
|
437
|
+
if self._conn:
|
|
438
|
+
logger.debug("Closing libsql connection to %s", self._target.database)
|
|
439
|
+
self._conn.close()
|
|
440
|
+
self._conn = None
|
|
441
|
+
self._initialized = False
|
|
442
|
+
|
|
443
|
+
# ------------------------------------------------------------------
|
|
444
|
+
# Delegated operations (to be swapped with native libsql versions).
|
|
445
|
+
# ------------------------------------------------------------------
|
|
446
|
+
|
|
447
|
+
async def insert_session_trace(self, trace: SessionTrace) -> str:
|
|
448
|
+
await self.initialize()
|
|
449
|
+
|
|
450
|
+
import logging as _logging
|
|
451
|
+
_logger = _logging.getLogger(__name__)
|
|
452
|
+
_logger.info(f"[TRACE_DEBUG] insert_session_trace START: session_id={trace.session_id}, {len(trace.markov_blanket_message_history)} messages")
|
|
453
|
+
|
|
454
|
+
session_exists = await self._session_exists(trace.session_id)
|
|
455
|
+
_logger.info(f"[TRACE_DEBUG] Session exists: {session_exists}")
|
|
456
|
+
|
|
457
|
+
step_id_map: dict[str, int] = {}
|
|
458
|
+
|
|
459
|
+
if session_exists:
|
|
460
|
+
_logger.warning(f"[TRACE_DEBUG] Session {trace.session_id} already exists, skipping events/timesteps, only updating messages!")
|
|
461
|
+
# Don't return early - we need to save messages!
|
|
462
|
+
# Just update metadata
|
|
463
|
+
async with self._op_lock:
|
|
464
|
+
conn = self._conn
|
|
465
|
+
assert conn is not None
|
|
466
|
+
conn.execute(
|
|
467
|
+
"UPDATE session_traces SET metadata = ? WHERE session_id = ?",
|
|
468
|
+
(_json_dumps(trace.metadata or {}), trace.session_id),
|
|
469
|
+
)
|
|
470
|
+
conn.commit()
|
|
471
|
+
# Skip events and timesteps to ensure idempotency
|
|
472
|
+
else:
|
|
473
|
+
created_at = trace.created_at or datetime.now(UTC)
|
|
474
|
+
|
|
475
|
+
async with self._op_lock:
|
|
476
|
+
conn = self._conn
|
|
477
|
+
assert conn is not None
|
|
478
|
+
conn.execute(
|
|
479
|
+
"""
|
|
480
|
+
INSERT INTO session_traces (
|
|
481
|
+
session_id,
|
|
482
|
+
created_at,
|
|
483
|
+
num_timesteps,
|
|
484
|
+
num_events,
|
|
485
|
+
num_messages,
|
|
486
|
+
metadata
|
|
487
|
+
)
|
|
488
|
+
VALUES (?, ?, 0, 0, 0, ?)
|
|
489
|
+
""",
|
|
490
|
+
(
|
|
491
|
+
trace.session_id,
|
|
492
|
+
created_at.isoformat(),
|
|
493
|
+
_json_dumps(trace.metadata or {}),
|
|
494
|
+
),
|
|
495
|
+
)
|
|
496
|
+
conn.commit()
|
|
497
|
+
_logger.info("[TRACE_DEBUG] Session row inserted")
|
|
498
|
+
|
|
499
|
+
# Only insert timesteps and events if this is a new session
|
|
500
|
+
for step in trace.session_time_steps:
|
|
501
|
+
step_db_id = await self.ensure_timestep(
|
|
502
|
+
trace.session_id,
|
|
503
|
+
step_id=step.step_id,
|
|
504
|
+
step_index=step.step_index,
|
|
505
|
+
turn_number=step.turn_number,
|
|
506
|
+
started_at=step.timestamp,
|
|
507
|
+
completed_at=step.completed_at,
|
|
508
|
+
metadata=step.step_metadata or {},
|
|
509
|
+
)
|
|
510
|
+
step_id_map[step.step_id] = step_db_id
|
|
511
|
+
|
|
512
|
+
for event in trace.event_history:
|
|
513
|
+
step_ref = None
|
|
514
|
+
metadata = event.metadata or {}
|
|
515
|
+
if isinstance(metadata, dict):
|
|
516
|
+
step_ref = metadata.get("step_id")
|
|
517
|
+
timestep_db_id = step_id_map.get(step_ref) if step_ref else None
|
|
518
|
+
await self.insert_event_row(
|
|
519
|
+
trace.session_id,
|
|
520
|
+
timestep_db_id=timestep_db_id,
|
|
521
|
+
event=event,
|
|
522
|
+
metadata_override=event.metadata or {},
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
import logging as _logging
|
|
526
|
+
_logger = _logging.getLogger(__name__)
|
|
527
|
+
_logger.info(f"[TRACE_DEBUG] insert_session_trace: saving {len(trace.markov_blanket_message_history)} messages (session_exists={session_exists})")
|
|
528
|
+
|
|
529
|
+
# Only insert messages if this is a new session (for idempotency)
|
|
530
|
+
if not session_exists:
|
|
531
|
+
for idx, msg in enumerate(trace.markov_blanket_message_history):
|
|
532
|
+
metadata = dict(getattr(msg, "metadata", {}) or {})
|
|
533
|
+
step_ref = metadata.get("step_id")
|
|
534
|
+
content_value = msg.content
|
|
535
|
+
if isinstance(msg.content, SessionMessageContent):
|
|
536
|
+
if msg.content.json_payload:
|
|
537
|
+
metadata.setdefault("json_payload", msg.content.json_payload)
|
|
538
|
+
content_value = msg.content.json_payload
|
|
539
|
+
else:
|
|
540
|
+
content_value = msg.content.as_text()
|
|
541
|
+
if msg.content.text:
|
|
542
|
+
metadata.setdefault("text", msg.content.text)
|
|
543
|
+
elif not isinstance(content_value, str):
|
|
544
|
+
try:
|
|
545
|
+
content_value = json.dumps(content_value, ensure_ascii=False)
|
|
546
|
+
except (TypeError, ValueError):
|
|
547
|
+
content_value = str(content_value)
|
|
548
|
+
|
|
549
|
+
_logger.info(f"[TRACE_DEBUG] Message {idx+1}: type={msg.message_type}, content_len={len(str(content_value))}")
|
|
550
|
+
|
|
551
|
+
try:
|
|
552
|
+
await self.insert_message_row(
|
|
553
|
+
trace.session_id,
|
|
554
|
+
timestep_db_id=step_id_map.get(step_ref) if step_ref else None,
|
|
555
|
+
message_type=msg.message_type,
|
|
556
|
+
content=content_value,
|
|
557
|
+
event_time=msg.time_record.event_time,
|
|
558
|
+
message_time=msg.time_record.message_time,
|
|
559
|
+
metadata=metadata,
|
|
560
|
+
)
|
|
561
|
+
_logger.info(f"[TRACE_DEBUG] Message {idx+1}: saved successfully")
|
|
562
|
+
except Exception as exc:
|
|
563
|
+
_logger.error(f"[TRACE_DEBUG] Message {idx+1}: FAILED TO SAVE: {exc}", exc_info=True)
|
|
564
|
+
raise
|
|
565
|
+
else:
|
|
566
|
+
_logger.info("[TRACE_DEBUG] Skipping message insertion for existing session (idempotency)")
|
|
567
|
+
|
|
568
|
+
async with self._op_lock:
|
|
569
|
+
conn = self._conn
|
|
570
|
+
assert conn is not None
|
|
571
|
+
conn.execute(
|
|
572
|
+
"UPDATE session_traces SET num_timesteps = ?, num_events = ?, num_messages = ?, metadata = ? WHERE session_id = ?",
|
|
573
|
+
(
|
|
574
|
+
len(trace.session_time_steps),
|
|
575
|
+
len(trace.event_history),
|
|
576
|
+
len(trace.markov_blanket_message_history),
|
|
577
|
+
_json_dumps(trace.metadata or {}),
|
|
578
|
+
trace.session_id,
|
|
579
|
+
),
|
|
580
|
+
)
|
|
581
|
+
conn.commit()
|
|
582
|
+
|
|
583
|
+
return trace.session_id
|
|
584
|
+
|
|
585
|
+
async def get_session_trace(self, session_id: str) -> dict[str, Any] | None:
|
|
586
|
+
await self.initialize()
|
|
587
|
+
|
|
588
|
+
async with self._op_lock:
|
|
589
|
+
conn = self._conn
|
|
590
|
+
assert conn is not None
|
|
591
|
+
|
|
592
|
+
session_cursor = conn.execute(
|
|
593
|
+
"""
|
|
594
|
+
SELECT session_id,
|
|
595
|
+
created_at,
|
|
596
|
+
num_timesteps,
|
|
597
|
+
num_events,
|
|
598
|
+
num_messages,
|
|
599
|
+
metadata
|
|
600
|
+
FROM session_traces
|
|
601
|
+
WHERE session_id = ?
|
|
602
|
+
""",
|
|
603
|
+
(session_id,),
|
|
604
|
+
)
|
|
605
|
+
session_row = session_cursor.fetchone()
|
|
606
|
+
session_cursor.close()
|
|
607
|
+
|
|
608
|
+
if not session_row:
|
|
609
|
+
return None
|
|
610
|
+
|
|
611
|
+
session_columns = ["session_id", "created_at", "num_timesteps", "num_events", "num_messages", "metadata"]
|
|
612
|
+
session_data = dict(zip(session_columns, session_row, strict=True))
|
|
613
|
+
|
|
614
|
+
timestep_cursor = conn.execute(
|
|
615
|
+
"""
|
|
616
|
+
SELECT step_id,
|
|
617
|
+
step_index,
|
|
618
|
+
turn_number,
|
|
619
|
+
started_at,
|
|
620
|
+
completed_at,
|
|
621
|
+
step_metadata
|
|
622
|
+
FROM session_timesteps
|
|
623
|
+
WHERE session_id = ?
|
|
624
|
+
ORDER BY step_index ASC
|
|
625
|
+
""",
|
|
626
|
+
(session_id,),
|
|
627
|
+
)
|
|
628
|
+
timestep_rows = timestep_cursor.fetchall()
|
|
629
|
+
timestep_cursor.close()
|
|
630
|
+
|
|
631
|
+
return {
|
|
632
|
+
"session_id": session_data["session_id"],
|
|
633
|
+
"created_at": _maybe_datetime(session_data["created_at"]),
|
|
634
|
+
"num_timesteps": session_data["num_timesteps"],
|
|
635
|
+
"num_events": session_data["num_events"],
|
|
636
|
+
"num_messages": session_data["num_messages"],
|
|
637
|
+
"metadata": _load_json(session_data["metadata"]),
|
|
638
|
+
"timesteps": [
|
|
639
|
+
{
|
|
640
|
+
"step_id": row[0],
|
|
641
|
+
"step_index": row[1],
|
|
642
|
+
"turn_number": row[2],
|
|
643
|
+
"started_at": _maybe_datetime(row[3]),
|
|
644
|
+
"completed_at": _maybe_datetime(row[4]),
|
|
645
|
+
"metadata": _load_json(row[5]),
|
|
646
|
+
}
|
|
647
|
+
for row in timestep_rows
|
|
648
|
+
],
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
async def _session_exists(self, session_id: str) -> bool:
|
|
652
|
+
await self.initialize()
|
|
653
|
+
async with self._op_lock:
|
|
654
|
+
conn = self._conn
|
|
655
|
+
assert conn is not None
|
|
656
|
+
cursor = conn.execute(
|
|
657
|
+
"SELECT 1 FROM session_traces WHERE session_id = ?", (session_id,)
|
|
658
|
+
)
|
|
659
|
+
row = cursor.fetchone()
|
|
660
|
+
cursor.close()
|
|
661
|
+
return row is not None
|
|
662
|
+
|
|
663
|
+
@staticmethod
|
|
664
|
+
def _normalise_params(params: dict[str, Any] | None) -> dict[str, Any]:
|
|
665
|
+
if not params:
|
|
666
|
+
return {}
|
|
667
|
+
normalised: dict[str, Any] = {}
|
|
668
|
+
for key, value in params.items():
|
|
669
|
+
if isinstance(value, datetime):
|
|
670
|
+
normalised[key] = value.isoformat()
|
|
671
|
+
else:
|
|
672
|
+
normalised[key] = value
|
|
673
|
+
return normalised
|
|
674
|
+
|
|
675
|
+
@staticmethod
|
|
676
|
+
def _prepare_query_params(query: str, params: dict[str, Any] | list[Any] | tuple[Any, ...]) -> tuple[str, tuple[Any, ...]]:
|
|
677
|
+
if isinstance(params, dict):
|
|
678
|
+
keys: list[str] = []
|
|
679
|
+
|
|
680
|
+
def _replace(match: re.Match[str]) -> str:
|
|
681
|
+
key = match.group(1)
|
|
682
|
+
keys.append(key)
|
|
683
|
+
return "?"
|
|
684
|
+
|
|
685
|
+
new_query = re.sub(r":([a-zA-Z_][a-zA-Z0-9_]*)", _replace, query)
|
|
686
|
+
if not keys:
|
|
687
|
+
raise ValueError("No named parameters found in query for provided mapping")
|
|
688
|
+
values = tuple(params[key] for key in keys)
|
|
689
|
+
return new_query, values
|
|
690
|
+
if isinstance(params, list | tuple):
|
|
691
|
+
return query, tuple(params)
|
|
692
|
+
raise TypeError("Unsupported parameter type for query execution")
|
|
693
|
+
|
|
694
|
+
def _ensure_schema(self) -> None:
|
|
695
|
+
if not self._conn:
|
|
696
|
+
raise RuntimeError("Connection not initialised")
|
|
697
|
+
|
|
698
|
+
for ddl in _TABLE_DEFINITIONS:
|
|
699
|
+
self._conn.execute(ddl)
|
|
700
|
+
for ddl in _INDEX_DEFINITIONS:
|
|
701
|
+
self._conn.execute(ddl)
|
|
702
|
+
for view_sql in analytics_views.values():
|
|
703
|
+
self._conn.execute(view_sql)
|
|
704
|
+
self._conn.commit()
|
|
705
|
+
|
|
706
|
+
async def query_traces(self, query: str, params: dict[str, Any] | None = None) -> Any:
|
|
707
|
+
await self.initialize()
|
|
708
|
+
|
|
709
|
+
async with self._op_lock:
|
|
710
|
+
conn = self._conn
|
|
711
|
+
assert conn is not None
|
|
712
|
+
normalised = self._normalise_params(params)
|
|
713
|
+
if normalised:
|
|
714
|
+
prepared_query, prepared_params = self._prepare_query_params(query, normalised)
|
|
715
|
+
cursor = conn.execute(prepared_query, prepared_params)
|
|
716
|
+
else:
|
|
717
|
+
cursor = conn.execute(query)
|
|
718
|
+
try:
|
|
719
|
+
description = cursor.description or []
|
|
720
|
+
columns = [col[0] for col in description]
|
|
721
|
+
rows = cursor.fetchall()
|
|
722
|
+
finally:
|
|
723
|
+
cursor.close()
|
|
724
|
+
|
|
725
|
+
if not rows:
|
|
726
|
+
if pd is not None:
|
|
727
|
+
return pd.DataFrame(columns=list(columns))
|
|
728
|
+
return []
|
|
729
|
+
|
|
730
|
+
records = [dict(zip(columns, row, strict=True)) for row in rows]
|
|
731
|
+
if pd is not None:
|
|
732
|
+
return pd.DataFrame(records)
|
|
733
|
+
return records
|
|
734
|
+
|
|
735
|
+
async def get_model_usage(
|
|
736
|
+
self,
|
|
737
|
+
start_date=None,
|
|
738
|
+
end_date=None,
|
|
739
|
+
model_name=None,
|
|
740
|
+
) -> Any:
|
|
741
|
+
query = """
|
|
742
|
+
SELECT * FROM model_usage_stats
|
|
743
|
+
WHERE 1=1
|
|
744
|
+
"""
|
|
745
|
+
params: dict[str, Any] = {}
|
|
746
|
+
if start_date:
|
|
747
|
+
params["start_date"] = start_date
|
|
748
|
+
query += " AND last_used >= :start_date"
|
|
749
|
+
if end_date:
|
|
750
|
+
params["end_date"] = end_date
|
|
751
|
+
query += " AND first_used <= :end_date"
|
|
752
|
+
if model_name:
|
|
753
|
+
params["model_name"] = model_name
|
|
754
|
+
query += " AND model_name = :model_name"
|
|
755
|
+
query += " ORDER BY usage_count DESC"
|
|
756
|
+
return await self.query_traces(query, params)
|
|
757
|
+
|
|
758
|
+
async def delete_session(self, session_id: str) -> bool:
|
|
759
|
+
await self.initialize()
|
|
760
|
+
|
|
761
|
+
async with self._op_lock:
|
|
762
|
+
conn = self._conn
|
|
763
|
+
assert conn is not None
|
|
764
|
+
|
|
765
|
+
cursor = conn.execute(
|
|
766
|
+
"SELECT 1 FROM session_traces WHERE session_id = ?", (session_id,)
|
|
767
|
+
)
|
|
768
|
+
exists = cursor.fetchone() is not None
|
|
769
|
+
cursor.close()
|
|
770
|
+
if not exists:
|
|
771
|
+
return False
|
|
772
|
+
|
|
773
|
+
conn.execute("DELETE FROM event_rewards WHERE session_id = ?", (session_id,))
|
|
774
|
+
conn.execute("DELETE FROM outcome_rewards WHERE session_id = ?", (session_id,))
|
|
775
|
+
conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
|
|
776
|
+
conn.execute("DELETE FROM events WHERE session_id = ?", (session_id,))
|
|
777
|
+
conn.execute("DELETE FROM session_timesteps WHERE session_id = ?", (session_id,))
|
|
778
|
+
conn.execute("DELETE FROM session_traces WHERE session_id = ?", (session_id,))
|
|
779
|
+
conn.commit()
|
|
780
|
+
return True
|
|
781
|
+
|
|
782
|
+
# Experiment helpers -------------------------------------------------
|
|
783
|
+
async def create_experiment(
|
|
784
|
+
self,
|
|
785
|
+
experiment_id: str,
|
|
786
|
+
name: str,
|
|
787
|
+
description: str | None = None,
|
|
788
|
+
configuration: dict[str, Any] | None = None,
|
|
789
|
+
) -> str:
|
|
790
|
+
await self.initialize()
|
|
791
|
+
|
|
792
|
+
async with self._op_lock:
|
|
793
|
+
conn = self._conn
|
|
794
|
+
assert conn is not None
|
|
795
|
+
conn.execute(
|
|
796
|
+
"""
|
|
797
|
+
INSERT INTO experiments (experiment_id, name, description, configuration)
|
|
798
|
+
VALUES (?, ?, ?, ?)
|
|
799
|
+
ON CONFLICT(experiment_id) DO UPDATE SET
|
|
800
|
+
name = excluded.name,
|
|
801
|
+
description = excluded.description,
|
|
802
|
+
configuration = excluded.configuration
|
|
803
|
+
""",
|
|
804
|
+
(
|
|
805
|
+
experiment_id,
|
|
806
|
+
name,
|
|
807
|
+
description,
|
|
808
|
+
_json_dumps(configuration or {}),
|
|
809
|
+
),
|
|
810
|
+
)
|
|
811
|
+
conn.commit()
|
|
812
|
+
return experiment_id
|
|
813
|
+
|
|
814
|
+
async def link_session_to_experiment(self, session_id: str, experiment_id: str):
|
|
815
|
+
await self.initialize()
|
|
816
|
+
|
|
817
|
+
async with self._op_lock:
|
|
818
|
+
conn = self._conn
|
|
819
|
+
assert conn is not None
|
|
820
|
+
conn.execute(
|
|
821
|
+
"UPDATE session_traces SET experiment_id = ? WHERE session_id = ?",
|
|
822
|
+
(experiment_id, session_id),
|
|
823
|
+
)
|
|
824
|
+
conn.commit()
|
|
825
|
+
|
|
826
|
+
async def get_sessions_by_experiment(
|
|
827
|
+
self, experiment_id: str, limit: int | None = None
|
|
828
|
+
) -> list[dict[str, Any]]:
|
|
829
|
+
await self.initialize()
|
|
830
|
+
|
|
831
|
+
sql = """
|
|
832
|
+
SELECT session_id,
|
|
833
|
+
created_at,
|
|
834
|
+
num_timesteps,
|
|
835
|
+
num_events,
|
|
836
|
+
num_messages,
|
|
837
|
+
metadata
|
|
838
|
+
FROM session_traces
|
|
839
|
+
WHERE experiment_id = ?
|
|
840
|
+
ORDER BY created_at DESC
|
|
841
|
+
"""
|
|
842
|
+
params: list[Any] = [experiment_id]
|
|
843
|
+
if limit is not None:
|
|
844
|
+
sql += " LIMIT ?"
|
|
845
|
+
params.append(limit)
|
|
846
|
+
|
|
847
|
+
async with self._op_lock:
|
|
848
|
+
conn = self._conn
|
|
849
|
+
assert conn is not None
|
|
850
|
+
cursor = conn.execute(sql, params)
|
|
851
|
+
rows = cursor.fetchall()
|
|
852
|
+
cursor.close()
|
|
853
|
+
|
|
854
|
+
return [
|
|
855
|
+
{
|
|
856
|
+
"session_id": row[0],
|
|
857
|
+
"created_at": _maybe_datetime(row[1]),
|
|
858
|
+
"num_timesteps": row[2],
|
|
859
|
+
"num_events": row[3],
|
|
860
|
+
"num_messages": row[4],
|
|
861
|
+
"metadata": _load_json(row[5]),
|
|
862
|
+
}
|
|
863
|
+
for row in rows
|
|
864
|
+
]
|
|
865
|
+
|
|
866
|
+
async def batch_insert_sessions(
|
|
867
|
+
self, traces: list[SessionTrace], batch_size: int | None = None
|
|
868
|
+
) -> list[str]:
|
|
869
|
+
batch_size = batch_size or CONFIG.batch_size
|
|
870
|
+
inserted: list[str] = []
|
|
871
|
+
|
|
872
|
+
for i in range(0, len(traces), batch_size):
|
|
873
|
+
chunk = traces[i : i + batch_size]
|
|
874
|
+
for trace in chunk:
|
|
875
|
+
session_id = await self.insert_session_trace(trace)
|
|
876
|
+
inserted.append(session_id)
|
|
877
|
+
return inserted
|
|
878
|
+
|
|
879
|
+
# Incremental helpers -----------------------------------------------
|
|
880
|
+
async def ensure_session(
|
|
881
|
+
self,
|
|
882
|
+
session_id: str,
|
|
883
|
+
*,
|
|
884
|
+
created_at=None,
|
|
885
|
+
metadata=None,
|
|
886
|
+
) -> None:
|
|
887
|
+
await self.initialize()
|
|
888
|
+
|
|
889
|
+
created_at_val = (created_at or datetime.now(UTC)).isoformat()
|
|
890
|
+
metadata_json = _json_dumps(metadata or {})
|
|
891
|
+
|
|
892
|
+
async with self._op_lock:
|
|
893
|
+
conn = self._conn
|
|
894
|
+
|
|
895
|
+
assert conn is not None
|
|
896
|
+
conn.execute(
|
|
897
|
+
"""
|
|
898
|
+
INSERT INTO session_traces (
|
|
899
|
+
session_id, created_at, num_timesteps, num_events, num_messages, metadata
|
|
900
|
+
)
|
|
901
|
+
VALUES (?, ?, 0, 0, 0, ?)
|
|
902
|
+
ON CONFLICT(session_id) DO NOTHING
|
|
903
|
+
""",
|
|
904
|
+
(session_id, created_at_val, metadata_json),
|
|
905
|
+
)
|
|
906
|
+
conn.commit()
|
|
907
|
+
|
|
908
|
+
async def ensure_timestep(
|
|
909
|
+
self,
|
|
910
|
+
session_id: str,
|
|
911
|
+
*,
|
|
912
|
+
step_id: str,
|
|
913
|
+
step_index: int,
|
|
914
|
+
turn_number: int | None = None,
|
|
915
|
+
started_at=None,
|
|
916
|
+
completed_at=None,
|
|
917
|
+
metadata=None,
|
|
918
|
+
) -> int:
|
|
919
|
+
await self.initialize()
|
|
920
|
+
|
|
921
|
+
started_at_val = (started_at or datetime.now(UTC)).isoformat()
|
|
922
|
+
completed_at_val = completed_at.isoformat() if completed_at else None
|
|
923
|
+
metadata_json = _json_dumps(metadata or {})
|
|
924
|
+
|
|
925
|
+
async with self._op_lock:
|
|
926
|
+
conn = self._conn
|
|
927
|
+
|
|
928
|
+
assert conn is not None
|
|
929
|
+
cur = conn.execute(
|
|
930
|
+
"""
|
|
931
|
+
SELECT id FROM session_timesteps
|
|
932
|
+
WHERE session_id = ? AND step_id = ?
|
|
933
|
+
""",
|
|
934
|
+
(session_id, step_id),
|
|
935
|
+
)
|
|
936
|
+
row = cur.fetchone()
|
|
937
|
+
if row:
|
|
938
|
+
return int(row[0])
|
|
939
|
+
|
|
940
|
+
cur = conn.execute(
|
|
941
|
+
"""
|
|
942
|
+
INSERT INTO session_timesteps (
|
|
943
|
+
session_id,
|
|
944
|
+
step_id,
|
|
945
|
+
step_index,
|
|
946
|
+
turn_number,
|
|
947
|
+
started_at,
|
|
948
|
+
completed_at,
|
|
949
|
+
num_events,
|
|
950
|
+
num_messages,
|
|
951
|
+
step_metadata
|
|
952
|
+
)
|
|
953
|
+
VALUES (?, ?, ?, ?, ?, ?, 0, 0, ?)
|
|
954
|
+
""",
|
|
955
|
+
(
|
|
956
|
+
session_id,
|
|
957
|
+
step_id,
|
|
958
|
+
step_index,
|
|
959
|
+
turn_number,
|
|
960
|
+
started_at_val,
|
|
961
|
+
completed_at_val,
|
|
962
|
+
metadata_json,
|
|
963
|
+
),
|
|
964
|
+
)
|
|
965
|
+
timestep_id = int(cur.lastrowid)
|
|
966
|
+
conn.execute(
|
|
967
|
+
"""
|
|
968
|
+
UPDATE session_traces
|
|
969
|
+
SET num_timesteps = num_timesteps + 1
|
|
970
|
+
WHERE session_id = ?
|
|
971
|
+
""",
|
|
972
|
+
(session_id,),
|
|
973
|
+
)
|
|
974
|
+
conn.commit()
|
|
975
|
+
return timestep_id
|
|
976
|
+
|
|
977
|
+
async def insert_event_row(
|
|
978
|
+
self,
|
|
979
|
+
session_id: str,
|
|
980
|
+
*,
|
|
981
|
+
timestep_db_id: int | None,
|
|
982
|
+
event: Any,
|
|
983
|
+
metadata_override: dict[str, Any] | None = None,
|
|
984
|
+
) -> int:
|
|
985
|
+
await self.initialize()
|
|
986
|
+
|
|
987
|
+
if not isinstance(event, EnvironmentEvent | LMCAISEvent | RuntimeEvent):
|
|
988
|
+
raise TypeError(f"Unsupported event type for native manager: {type(event)!r}")
|
|
989
|
+
|
|
990
|
+
metadata_json = metadata_override or event.metadata or {}
|
|
991
|
+
event_extra_metadata = getattr(event, "event_metadata", None)
|
|
992
|
+
system_state_before = getattr(event, "system_state_before", None)
|
|
993
|
+
system_state_after = getattr(event, "system_state_after", None)
|
|
994
|
+
|
|
995
|
+
payload: dict[str, Any] = {
|
|
996
|
+
"session_id": session_id,
|
|
997
|
+
"timestep_id": timestep_db_id,
|
|
998
|
+
"system_instance_id": event.system_instance_id,
|
|
999
|
+
"event_time": event.time_record.event_time,
|
|
1000
|
+
"message_time": event.time_record.message_time,
|
|
1001
|
+
"metadata": metadata_json,
|
|
1002
|
+
"event_metadata": event_extra_metadata,
|
|
1003
|
+
"system_state_before": system_state_before,
|
|
1004
|
+
"system_state_after": system_state_after,
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
if isinstance(event, LMCAISEvent):
|
|
1008
|
+
call_records = None
|
|
1009
|
+
if getattr(event, "call_records", None):
|
|
1010
|
+
# Handle both dataclass instances and dicts (from deserialization)
|
|
1011
|
+
call_records = [
|
|
1012
|
+
asdict(record) if not isinstance(record, dict) else record
|
|
1013
|
+
for record in event.call_records
|
|
1014
|
+
]
|
|
1015
|
+
payload.update(
|
|
1016
|
+
{
|
|
1017
|
+
"event_type": "cais",
|
|
1018
|
+
"model_name": event.model_name,
|
|
1019
|
+
"provider": event.provider,
|
|
1020
|
+
"input_tokens": event.input_tokens,
|
|
1021
|
+
"output_tokens": event.output_tokens,
|
|
1022
|
+
"total_tokens": event.total_tokens,
|
|
1023
|
+
"cost_usd": int(event.cost_usd * 100) if event.cost_usd is not None else None,
|
|
1024
|
+
"latency_ms": event.latency_ms,
|
|
1025
|
+
"span_id": event.span_id,
|
|
1026
|
+
"trace_id": event.trace_id,
|
|
1027
|
+
"call_records": call_records,
|
|
1028
|
+
}
|
|
1029
|
+
)
|
|
1030
|
+
elif isinstance(event, EnvironmentEvent):
|
|
1031
|
+
payload.update(
|
|
1032
|
+
{
|
|
1033
|
+
"event_type": "environment",
|
|
1034
|
+
"reward": event.reward,
|
|
1035
|
+
"terminated": event.terminated,
|
|
1036
|
+
"truncated": event.truncated,
|
|
1037
|
+
}
|
|
1038
|
+
)
|
|
1039
|
+
elif isinstance(event, RuntimeEvent):
|
|
1040
|
+
payload.update(
|
|
1041
|
+
{
|
|
1042
|
+
"event_type": "runtime",
|
|
1043
|
+
"metadata": {**(event.metadata or {}), "actions": event.actions},
|
|
1044
|
+
}
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
async with self._op_lock:
|
|
1048
|
+
conn = self._conn
|
|
1049
|
+
|
|
1050
|
+
assert conn is not None
|
|
1051
|
+
cur = conn.execute(
|
|
1052
|
+
"""
|
|
1053
|
+
INSERT INTO events (
|
|
1054
|
+
session_id,
|
|
1055
|
+
timestep_id,
|
|
1056
|
+
event_type,
|
|
1057
|
+
system_instance_id,
|
|
1058
|
+
event_time,
|
|
1059
|
+
message_time,
|
|
1060
|
+
model_name,
|
|
1061
|
+
provider,
|
|
1062
|
+
input_tokens,
|
|
1063
|
+
output_tokens,
|
|
1064
|
+
total_tokens,
|
|
1065
|
+
cost_usd,
|
|
1066
|
+
latency_ms,
|
|
1067
|
+
span_id,
|
|
1068
|
+
trace_id,
|
|
1069
|
+
call_records,
|
|
1070
|
+
reward,
|
|
1071
|
+
terminated,
|
|
1072
|
+
truncated,
|
|
1073
|
+
system_state_before,
|
|
1074
|
+
system_state_after,
|
|
1075
|
+
metadata,
|
|
1076
|
+
event_metadata
|
|
1077
|
+
)
|
|
1078
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1079
|
+
""",
|
|
1080
|
+
(
|
|
1081
|
+
payload["session_id"],
|
|
1082
|
+
payload["timestep_id"],
|
|
1083
|
+
payload.get("event_type"),
|
|
1084
|
+
payload["system_instance_id"],
|
|
1085
|
+
payload["event_time"],
|
|
1086
|
+
payload["message_time"],
|
|
1087
|
+
payload.get("model_name"),
|
|
1088
|
+
payload.get("provider"),
|
|
1089
|
+
payload.get("input_tokens"),
|
|
1090
|
+
payload.get("output_tokens"),
|
|
1091
|
+
payload.get("total_tokens"),
|
|
1092
|
+
payload.get("cost_usd"),
|
|
1093
|
+
payload.get("latency_ms"),
|
|
1094
|
+
payload.get("span_id"),
|
|
1095
|
+
payload.get("trace_id"),
|
|
1096
|
+
_json_dumps(payload.get("call_records")),
|
|
1097
|
+
payload.get("reward"),
|
|
1098
|
+
payload.get("terminated"),
|
|
1099
|
+
payload.get("truncated"),
|
|
1100
|
+
_json_dumps(payload.get("system_state_before")),
|
|
1101
|
+
_json_dumps(payload.get("system_state_after")),
|
|
1102
|
+
_json_dumps(payload.get("metadata")),
|
|
1103
|
+
_json_dumps(payload.get("event_metadata")),
|
|
1104
|
+
),
|
|
1105
|
+
)
|
|
1106
|
+
event_id = int(cur.lastrowid)
|
|
1107
|
+
conn.execute(
|
|
1108
|
+
"""
|
|
1109
|
+
UPDATE session_traces
|
|
1110
|
+
SET num_events = num_events + 1
|
|
1111
|
+
WHERE session_id = ?
|
|
1112
|
+
""",
|
|
1113
|
+
(session_id,),
|
|
1114
|
+
)
|
|
1115
|
+
if timestep_db_id is not None:
|
|
1116
|
+
conn.execute(
|
|
1117
|
+
"""
|
|
1118
|
+
UPDATE session_timesteps
|
|
1119
|
+
SET num_events = num_events + 1
|
|
1120
|
+
WHERE id = ?
|
|
1121
|
+
""",
|
|
1122
|
+
(timestep_db_id,),
|
|
1123
|
+
)
|
|
1124
|
+
conn.commit()
|
|
1125
|
+
return event_id
|
|
1126
|
+
|
|
1127
|
+
async def insert_message_row(
|
|
1128
|
+
self,
|
|
1129
|
+
session_id: str,
|
|
1130
|
+
*,
|
|
1131
|
+
timestep_db_id: int | None,
|
|
1132
|
+
message_type: str,
|
|
1133
|
+
content: Any,
|
|
1134
|
+
event_time: float | None = None,
|
|
1135
|
+
message_time: int | None = None,
|
|
1136
|
+
metadata: dict[str, Any] | None = None,
|
|
1137
|
+
) -> int:
|
|
1138
|
+
await self.initialize()
|
|
1139
|
+
|
|
1140
|
+
metadata_payload = dict(metadata or {})
|
|
1141
|
+
if isinstance(content, SessionMessageContent):
|
|
1142
|
+
if content.json_payload:
|
|
1143
|
+
metadata_payload.setdefault("json_payload", content.json_payload)
|
|
1144
|
+
content_value = content.json_payload
|
|
1145
|
+
else:
|
|
1146
|
+
content_value = content.as_text()
|
|
1147
|
+
if content.text:
|
|
1148
|
+
metadata_payload.setdefault("text", content.text)
|
|
1149
|
+
else:
|
|
1150
|
+
content_value = content
|
|
1151
|
+
if not isinstance(content_value, str):
|
|
1152
|
+
try:
|
|
1153
|
+
content_value = json.dumps(content_value, ensure_ascii=False)
|
|
1154
|
+
except (TypeError, ValueError):
|
|
1155
|
+
content_value = str(content_value)
|
|
1156
|
+
|
|
1157
|
+
async with self._op_lock:
|
|
1158
|
+
conn = self._conn
|
|
1159
|
+
|
|
1160
|
+
assert conn is not None
|
|
1161
|
+
cur = conn.execute(
|
|
1162
|
+
"""
|
|
1163
|
+
INSERT INTO messages (
|
|
1164
|
+
session_id,
|
|
1165
|
+
timestep_id,
|
|
1166
|
+
message_type,
|
|
1167
|
+
content,
|
|
1168
|
+
event_time,
|
|
1169
|
+
message_time,
|
|
1170
|
+
metadata
|
|
1171
|
+
)
|
|
1172
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
1173
|
+
""",
|
|
1174
|
+
(
|
|
1175
|
+
session_id,
|
|
1176
|
+
timestep_db_id,
|
|
1177
|
+
message_type,
|
|
1178
|
+
content_value,
|
|
1179
|
+
event_time,
|
|
1180
|
+
message_time,
|
|
1181
|
+
_json_dumps(metadata_payload),
|
|
1182
|
+
),
|
|
1183
|
+
)
|
|
1184
|
+
message_id = int(cur.lastrowid)
|
|
1185
|
+
conn.execute(
|
|
1186
|
+
"""
|
|
1187
|
+
UPDATE session_traces
|
|
1188
|
+
SET num_messages = num_messages + 1
|
|
1189
|
+
WHERE session_id = ?
|
|
1190
|
+
""",
|
|
1191
|
+
(session_id,),
|
|
1192
|
+
)
|
|
1193
|
+
if timestep_db_id is not None:
|
|
1194
|
+
conn.execute(
|
|
1195
|
+
"""
|
|
1196
|
+
UPDATE session_timesteps
|
|
1197
|
+
SET num_messages = num_messages + 1
|
|
1198
|
+
WHERE id = ?
|
|
1199
|
+
""",
|
|
1200
|
+
(timestep_db_id,),
|
|
1201
|
+
)
|
|
1202
|
+
conn.commit()
|
|
1203
|
+
return message_id
|
|
1204
|
+
|
|
1205
|
+
async def insert_outcome_reward(
|
|
1206
|
+
self,
|
|
1207
|
+
session_id: str,
|
|
1208
|
+
*,
|
|
1209
|
+
total_reward: int,
|
|
1210
|
+
achievements_count: int,
|
|
1211
|
+
total_steps: int,
|
|
1212
|
+
reward_metadata: dict | None = None,
|
|
1213
|
+
) -> int:
|
|
1214
|
+
await self.initialize()
|
|
1215
|
+
|
|
1216
|
+
async with self._op_lock:
|
|
1217
|
+
conn = self._conn
|
|
1218
|
+
|
|
1219
|
+
assert conn is not None
|
|
1220
|
+
cur = conn.execute(
|
|
1221
|
+
"""
|
|
1222
|
+
INSERT INTO outcome_rewards (
|
|
1223
|
+
session_id,
|
|
1224
|
+
total_reward,
|
|
1225
|
+
achievements_count,
|
|
1226
|
+
total_steps,
|
|
1227
|
+
created_at,
|
|
1228
|
+
reward_metadata
|
|
1229
|
+
)
|
|
1230
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
1231
|
+
""",
|
|
1232
|
+
(
|
|
1233
|
+
session_id,
|
|
1234
|
+
total_reward,
|
|
1235
|
+
achievements_count,
|
|
1236
|
+
total_steps,
|
|
1237
|
+
datetime.now(UTC).isoformat(),
|
|
1238
|
+
_json_dumps(reward_metadata),
|
|
1239
|
+
),
|
|
1240
|
+
)
|
|
1241
|
+
conn.commit()
|
|
1242
|
+
return int(cur.lastrowid)
|
|
1243
|
+
|
|
1244
|
+
async def insert_event_reward(
|
|
1245
|
+
self,
|
|
1246
|
+
session_id: str,
|
|
1247
|
+
*,
|
|
1248
|
+
event_id: int,
|
|
1249
|
+
message_id: int | None = None,
|
|
1250
|
+
turn_number: int | None = None,
|
|
1251
|
+
reward_value: float = 0.0,
|
|
1252
|
+
reward_type: str | None = None,
|
|
1253
|
+
key: str | None = None,
|
|
1254
|
+
annotation: dict[str, Any] | None = None,
|
|
1255
|
+
source: str | None = None,
|
|
1256
|
+
) -> int:
|
|
1257
|
+
await self.initialize()
|
|
1258
|
+
|
|
1259
|
+
async with self._op_lock:
|
|
1260
|
+
conn = self._conn
|
|
1261
|
+
|
|
1262
|
+
assert conn is not None
|
|
1263
|
+
cur = conn.execute(
|
|
1264
|
+
"""
|
|
1265
|
+
INSERT INTO event_rewards (
|
|
1266
|
+
event_id,
|
|
1267
|
+
session_id,
|
|
1268
|
+
message_id,
|
|
1269
|
+
turn_number,
|
|
1270
|
+
reward_value,
|
|
1271
|
+
reward_type,
|
|
1272
|
+
key,
|
|
1273
|
+
annotation,
|
|
1274
|
+
source,
|
|
1275
|
+
created_at
|
|
1276
|
+
)
|
|
1277
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1278
|
+
""",
|
|
1279
|
+
(
|
|
1280
|
+
event_id,
|
|
1281
|
+
session_id,
|
|
1282
|
+
message_id,
|
|
1283
|
+
turn_number,
|
|
1284
|
+
reward_value,
|
|
1285
|
+
reward_type,
|
|
1286
|
+
key,
|
|
1287
|
+
_json_dumps(annotation),
|
|
1288
|
+
source,
|
|
1289
|
+
datetime.now(UTC).isoformat(),
|
|
1290
|
+
),
|
|
1291
|
+
)
|
|
1292
|
+
conn.commit()
|
|
1293
|
+
return int(cur.lastrowid)
|