synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
- examples/sft/evaluate.py +2 -0
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +56 -26
- examples/swe/task_app/hosted/rollout.py +42 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +799 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +2 -2
- synth_ai/api/models/supported.py +1 -0
- synth_ai/api/train/builders.py +25 -11
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +10 -10
- synth_ai/api/train/configs/rl.py +5 -4
- synth_ai/api/train/configs/sft.py +4 -3
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +48 -59
- synth_ai/cli/_modal_wrapper.py +3 -2
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +14 -7
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/recent.py +1 -1
- synth_ai/cli/rl_demo.py +8 -7
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/status.py +1 -1
- synth_ai/cli/task_apps.py +1922 -190
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/tui.py +57 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/evals/client.py +58 -61
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +9 -9
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +24 -5
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +257 -0
- synth_ai/task/contracts.py +138 -39
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +56 -0
- synth_ai/task/rubrics/loaders.py +152 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
- synth_ai/task/server.py +8 -7
- synth_ai/task/trace_correlation_helpers.py +315 -0
- synth_ai/task/validators.py +413 -6
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +5 -5
- synth_ai/tracing_v3/session_tracer.py +16 -6
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/daemon.py +8 -7
- synth_ai/tracing_v3/turso/native_manager.py +66 -43
- synth_ai/tracing_v3/utils.py +3 -3
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +906 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
- examples/agora_ex/README_MoE.md +0 -224
- examples/agora_ex/__init__.py +0 -7
- examples/agora_ex/agora_ex.py +0 -65
- examples/agora_ex/agora_ex_task_app.py +0 -590
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
- examples/agora_ex/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/system_prompt_CURRENT.md +0 -63
- examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
- examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
- synth_ai/rubrics/__init__.py +0 -22
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
|
@@ -13,7 +13,7 @@ import logging
|
|
|
13
13
|
import re
|
|
14
14
|
from collections.abc import Callable
|
|
15
15
|
from dataclasses import asdict, dataclass
|
|
16
|
-
from datetime import
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
17
|
from typing import TYPE_CHECKING, Any, cast
|
|
18
18
|
|
|
19
19
|
import libsql
|
|
@@ -117,7 +117,7 @@ def _maybe_datetime(value: Any) -> Any:
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
def _load_json(value: Any) -> Any:
|
|
120
|
-
if value is None or isinstance(value,
|
|
120
|
+
if value is None or isinstance(value, dict | list):
|
|
121
121
|
return value or {}
|
|
122
122
|
if isinstance(value, str):
|
|
123
123
|
try:
|
|
@@ -370,8 +370,18 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
370
370
|
|
|
371
371
|
async def insert_session_trace(self, trace: SessionTrace) -> str:
|
|
372
372
|
await self.initialize()
|
|
373
|
-
|
|
374
|
-
|
|
373
|
+
|
|
374
|
+
import logging as _logging
|
|
375
|
+
_logger = _logging.getLogger(__name__)
|
|
376
|
+
_logger.info(f"[TRACE_DEBUG] insert_session_trace START: session_id={trace.session_id}, {len(trace.markov_blanket_message_history)} messages")
|
|
377
|
+
|
|
378
|
+
session_exists = await self._session_exists(trace.session_id)
|
|
379
|
+
_logger.info(f"[TRACE_DEBUG] Session exists: {session_exists}")
|
|
380
|
+
|
|
381
|
+
if session_exists:
|
|
382
|
+
_logger.warning(f"[TRACE_DEBUG] Session {trace.session_id} already exists, need to save messages anyway!")
|
|
383
|
+
# Don't return early - we need to save messages!
|
|
384
|
+
# Just update metadata
|
|
375
385
|
async with self._op_lock:
|
|
376
386
|
conn = self._conn
|
|
377
387
|
assert conn is not None
|
|
@@ -380,32 +390,34 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
380
390
|
(_json_dumps(trace.metadata or {}), trace.session_id),
|
|
381
391
|
)
|
|
382
392
|
conn.commit()
|
|
383
|
-
|
|
393
|
+
# Continue to save messages instead of returning
|
|
384
394
|
|
|
385
|
-
|
|
395
|
+
if not session_exists:
|
|
396
|
+
created_at = trace.created_at or datetime.now(timezone.utc)
|
|
386
397
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
398
|
+
async with self._op_lock:
|
|
399
|
+
conn = self._conn
|
|
400
|
+
assert conn is not None
|
|
401
|
+
conn.execute(
|
|
402
|
+
"""
|
|
403
|
+
INSERT INTO session_traces (
|
|
404
|
+
session_id,
|
|
405
|
+
created_at,
|
|
406
|
+
num_timesteps,
|
|
407
|
+
num_events,
|
|
408
|
+
num_messages,
|
|
409
|
+
metadata
|
|
410
|
+
)
|
|
411
|
+
VALUES (?, ?, 0, 0, 0, ?)
|
|
412
|
+
""",
|
|
413
|
+
(
|
|
414
|
+
trace.session_id,
|
|
415
|
+
created_at.isoformat(),
|
|
416
|
+
_json_dumps(trace.metadata or {}),
|
|
417
|
+
),
|
|
399
418
|
)
|
|
400
|
-
|
|
401
|
-
""
|
|
402
|
-
(
|
|
403
|
-
trace.session_id,
|
|
404
|
-
created_at.isoformat(),
|
|
405
|
-
_json_dumps(trace.metadata or {}),
|
|
406
|
-
),
|
|
407
|
-
)
|
|
408
|
-
conn.commit()
|
|
419
|
+
conn.commit()
|
|
420
|
+
_logger.info(f"[TRACE_DEBUG] Session row inserted")
|
|
409
421
|
|
|
410
422
|
step_id_map: dict[str, int] = {}
|
|
411
423
|
|
|
@@ -434,7 +446,11 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
434
446
|
metadata_override=event.metadata or {},
|
|
435
447
|
)
|
|
436
448
|
|
|
437
|
-
|
|
449
|
+
import logging as _logging
|
|
450
|
+
_logger = _logging.getLogger(__name__)
|
|
451
|
+
_logger.info(f"[TRACE_DEBUG] insert_session_trace: saving {len(trace.markov_blanket_message_history)} messages")
|
|
452
|
+
|
|
453
|
+
for idx, msg in enumerate(trace.markov_blanket_message_history):
|
|
438
454
|
metadata = dict(getattr(msg, "metadata", {}) or {})
|
|
439
455
|
step_ref = metadata.get("step_id")
|
|
440
456
|
content_value = msg.content
|
|
@@ -452,15 +468,22 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
452
468
|
except (TypeError, ValueError):
|
|
453
469
|
content_value = str(content_value)
|
|
454
470
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
471
|
+
_logger.info(f"[TRACE_DEBUG] Message {idx+1}: type={msg.message_type}, content_len={len(str(content_value))}")
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
await self.insert_message_row(
|
|
475
|
+
trace.session_id,
|
|
476
|
+
timestep_db_id=step_id_map.get(step_ref) if step_ref else None,
|
|
477
|
+
message_type=msg.message_type,
|
|
478
|
+
content=content_value,
|
|
479
|
+
event_time=msg.time_record.event_time,
|
|
480
|
+
message_time=msg.time_record.message_time,
|
|
481
|
+
metadata=metadata,
|
|
482
|
+
)
|
|
483
|
+
_logger.info(f"[TRACE_DEBUG] Message {idx+1}: saved successfully")
|
|
484
|
+
except Exception as exc:
|
|
485
|
+
_logger.error(f"[TRACE_DEBUG] Message {idx+1}: FAILED TO SAVE: {exc}", exc_info=True)
|
|
486
|
+
raise
|
|
464
487
|
|
|
465
488
|
async with self._op_lock:
|
|
466
489
|
conn = self._conn
|
|
@@ -584,7 +607,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
584
607
|
raise ValueError("No named parameters found in query for provided mapping")
|
|
585
608
|
values = tuple(params[key] for key in keys)
|
|
586
609
|
return new_query, values
|
|
587
|
-
if isinstance(params,
|
|
610
|
+
if isinstance(params, list | tuple):
|
|
588
611
|
return query, tuple(params)
|
|
589
612
|
raise TypeError("Unsupported parameter type for query execution")
|
|
590
613
|
|
|
@@ -783,7 +806,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
783
806
|
) -> None:
|
|
784
807
|
await self.initialize()
|
|
785
808
|
|
|
786
|
-
created_at_val = (created_at or datetime.now(
|
|
809
|
+
created_at_val = (created_at or datetime.now(timezone.utc)).isoformat()
|
|
787
810
|
metadata_json = _json_dumps(metadata or {})
|
|
788
811
|
|
|
789
812
|
async with self._op_lock:
|
|
@@ -815,7 +838,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
815
838
|
) -> int:
|
|
816
839
|
await self.initialize()
|
|
817
840
|
|
|
818
|
-
started_at_val = (started_at or datetime.now(
|
|
841
|
+
started_at_val = (started_at or datetime.now(timezone.utc)).isoformat()
|
|
819
842
|
completed_at_val = completed_at.isoformat() if completed_at else None
|
|
820
843
|
metadata_json = _json_dumps(metadata or {})
|
|
821
844
|
|
|
@@ -881,7 +904,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
881
904
|
) -> int:
|
|
882
905
|
await self.initialize()
|
|
883
906
|
|
|
884
|
-
if not isinstance(event,
|
|
907
|
+
if not isinstance(event, EnvironmentEvent | LMCAISEvent | RuntimeEvent):
|
|
885
908
|
raise TypeError(f"Unsupported event type for native manager: {type(event)!r}")
|
|
886
909
|
|
|
887
910
|
metadata_json = metadata_override or event.metadata or {}
|
|
@@ -1127,7 +1150,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
1127
1150
|
total_reward,
|
|
1128
1151
|
achievements_count,
|
|
1129
1152
|
total_steps,
|
|
1130
|
-
datetime.now(
|
|
1153
|
+
datetime.now(timezone.utc).isoformat(),
|
|
1131
1154
|
_json_dumps(reward_metadata),
|
|
1132
1155
|
),
|
|
1133
1156
|
)
|
|
@@ -1179,7 +1202,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
1179
1202
|
key,
|
|
1180
1203
|
_json_dumps(annotation),
|
|
1181
1204
|
source,
|
|
1182
|
-
datetime.now(
|
|
1205
|
+
datetime.now(timezone.utc).isoformat(),
|
|
1183
1206
|
),
|
|
1184
1207
|
)
|
|
1185
1208
|
conn.commit()
|
synth_ai/tracing_v3/utils.py
CHANGED
|
@@ -5,13 +5,13 @@ from __future__ import annotations
|
|
|
5
5
|
import hashlib
|
|
6
6
|
import json
|
|
7
7
|
import uuid
|
|
8
|
-
from datetime import
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
9
|
from typing import Any
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def iso_now() -> str:
|
|
13
|
-
"""Get current
|
|
14
|
-
return datetime.now(
|
|
13
|
+
"""Get current timezone.utc time as ISO format string."""
|
|
14
|
+
return datetime.now(timezone.utc).isoformat()
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def json_dumps(obj: Any) -> str:
|
synth_ai/tui/__init__.py
ADDED
synth_ai/tui/__main__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Command Line Interface tools for synth-ai."""
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Query experiments and sessions from Turso/sqld using v3 tracing.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import asyncio
|
|
8
|
+
|
|
9
|
+
from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def list_experiments(db_url: str):
|
|
13
|
+
"""List all experiments in the database."""
|
|
14
|
+
db = AsyncSQLTraceManager(db_url)
|
|
15
|
+
await db.initialize()
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
df = await db.query_traces("""
|
|
19
|
+
SELECT
|
|
20
|
+
e.experiment_id,
|
|
21
|
+
e.name,
|
|
22
|
+
e.description,
|
|
23
|
+
e.created_at,
|
|
24
|
+
COUNT(DISTINCT st.session_id) as num_sessions,
|
|
25
|
+
COUNT(DISTINCT ev.id) as num_events,
|
|
26
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
|
|
27
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
|
|
28
|
+
FROM experiments e
|
|
29
|
+
LEFT JOIN session_traces st ON e.experiment_id = st.experiment_id
|
|
30
|
+
LEFT JOIN events ev ON st.session_id = ev.session_id
|
|
31
|
+
GROUP BY e.experiment_id, e.name, e.description, e.created_at
|
|
32
|
+
ORDER BY e.created_at DESC
|
|
33
|
+
""")
|
|
34
|
+
|
|
35
|
+
if df.empty:
|
|
36
|
+
print("No experiments found in database.")
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
print(f"\n{'=' * 100}")
|
|
40
|
+
print(f"{'Experiments in ' + db_url:^100}")
|
|
41
|
+
print(f"{'=' * 100}\n")
|
|
42
|
+
|
|
43
|
+
for _, row in df.iterrows():
|
|
44
|
+
print(f"🧪 {row['name']} (id: {row['experiment_id'][:8]}...)")
|
|
45
|
+
print(f" Created: {row['created_at']}")
|
|
46
|
+
print(f" Description: {row['description']}")
|
|
47
|
+
print(f" Sessions: {row['num_sessions']}")
|
|
48
|
+
print(f" Events: {row['num_events']:,}")
|
|
49
|
+
if row["total_cost"] and row["total_cost"] > 0:
|
|
50
|
+
print(f" Cost: ${row['total_cost']:.4f}")
|
|
51
|
+
if row["total_tokens"] and row["total_tokens"] > 0:
|
|
52
|
+
print(f" Tokens: {int(row['total_tokens']):,}")
|
|
53
|
+
print()
|
|
54
|
+
finally:
|
|
55
|
+
await db.close()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def show_experiment_details(db_url: str, experiment_id: str):
|
|
59
|
+
"""Show detailed information about a specific experiment."""
|
|
60
|
+
db = AsyncSQLTraceManager(db_url)
|
|
61
|
+
await db.initialize()
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
# Get experiment info
|
|
65
|
+
exp_df = await db.query_traces(
|
|
66
|
+
"""
|
|
67
|
+
SELECT * FROM experiments WHERE experiment_id LIKE :exp_id
|
|
68
|
+
""",
|
|
69
|
+
{"exp_id": f"{experiment_id}%"},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if exp_df.empty:
|
|
73
|
+
print(f"No experiment found matching ID: {experiment_id}")
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
exp = exp_df.iloc[0]
|
|
77
|
+
print(f"\n{'=' * 100}")
|
|
78
|
+
print(f"Experiment: {exp['name']} ({exp['experiment_id']})")
|
|
79
|
+
print(f"{'=' * 100}\n")
|
|
80
|
+
|
|
81
|
+
# Get session statistics
|
|
82
|
+
sessions_df = await db.get_sessions_by_experiment(exp["experiment_id"])
|
|
83
|
+
|
|
84
|
+
if sessions_df:
|
|
85
|
+
print(f"Sessions: {len(sessions_df)}")
|
|
86
|
+
|
|
87
|
+
# Get aggregated stats
|
|
88
|
+
stats_df = await db.query_traces(
|
|
89
|
+
"""
|
|
90
|
+
SELECT
|
|
91
|
+
COUNT(DISTINCT ev.id) as total_events,
|
|
92
|
+
COUNT(DISTINCT m.id) as total_messages,
|
|
93
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
|
|
94
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
|
|
95
|
+
FROM session_traces st
|
|
96
|
+
LEFT JOIN events ev ON st.session_id = ev.session_id
|
|
97
|
+
LEFT JOIN messages m ON st.session_id = m.session_id
|
|
98
|
+
WHERE st.experiment_id = :exp_id
|
|
99
|
+
""",
|
|
100
|
+
{"exp_id": exp["experiment_id"]},
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if not stats_df.empty:
|
|
104
|
+
stats = stats_df.iloc[0]
|
|
105
|
+
print(f"Total events: {int(stats['total_events']):,}")
|
|
106
|
+
print(f"Total messages: {int(stats['total_messages']):,}")
|
|
107
|
+
print(f"Total cost: ${stats['total_cost']:.4f}")
|
|
108
|
+
print(f"Total tokens: {int(stats['total_tokens']):,}")
|
|
109
|
+
|
|
110
|
+
# Show session list
|
|
111
|
+
print("\nSession list:")
|
|
112
|
+
for sess in sessions_df:
|
|
113
|
+
print(f" - {sess['session_id']} ({sess['created_at']})")
|
|
114
|
+
print(
|
|
115
|
+
f" Timesteps: {sess['num_timesteps']}, Events: {sess['num_events']}, Messages: {sess['num_messages']}"
|
|
116
|
+
)
|
|
117
|
+
finally:
|
|
118
|
+
await db.close()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
async def show_model_usage(db_url: str, model_name: str | None = None):
|
|
122
|
+
"""Show model usage statistics."""
|
|
123
|
+
db = AsyncSQLTraceManager(db_url)
|
|
124
|
+
await db.initialize()
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
df = await db.get_model_usage(model_name=model_name)
|
|
128
|
+
|
|
129
|
+
if df.empty:
|
|
130
|
+
print("No model usage data found.")
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
print(f"\n{'=' * 100}")
|
|
134
|
+
print(f"{'Model Usage Statistics':^100}")
|
|
135
|
+
print(f"{'=' * 100}\n")
|
|
136
|
+
|
|
137
|
+
print(df.to_string(index=False))
|
|
138
|
+
finally:
|
|
139
|
+
await db.close()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def main():
|
|
143
|
+
parser = argparse.ArgumentParser(description="Query experiments from Turso/sqld (v3)")
|
|
144
|
+
parser.add_argument(
|
|
145
|
+
"-u", "--url", default="sqlite+libsql://http://127.0.0.1:8080", help="Turso database URL"
|
|
146
|
+
)
|
|
147
|
+
parser.add_argument(
|
|
148
|
+
"-e", "--experiment", help="Show details for specific experiment ID (can be partial)"
|
|
149
|
+
)
|
|
150
|
+
parser.add_argument("-m", "--model", help="Show usage for specific model")
|
|
151
|
+
parser.add_argument("--usage", action="store_true", help="Show model usage statistics")
|
|
152
|
+
|
|
153
|
+
args = parser.parse_args()
|
|
154
|
+
|
|
155
|
+
if args.usage or args.model:
|
|
156
|
+
await show_model_usage(args.url, args.model)
|
|
157
|
+
elif args.experiment:
|
|
158
|
+
await show_experiment_details(args.url, args.experiment)
|
|
159
|
+
else:
|
|
160
|
+
await list_experiments(args.url)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
if __name__ == "__main__":
|
|
164
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Query experiments and sessions from Turso/sqld using v3 tracing.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import asyncio
|
|
8
|
+
|
|
9
|
+
from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def list_experiments(db_url: str):
|
|
13
|
+
"""List all experiments in the database."""
|
|
14
|
+
db = AsyncSQLTraceManager(db_url)
|
|
15
|
+
await db.initialize()
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
df = await db.query_traces("""
|
|
19
|
+
SELECT
|
|
20
|
+
e.experiment_id,
|
|
21
|
+
e.name,
|
|
22
|
+
e.description,
|
|
23
|
+
e.created_at,
|
|
24
|
+
COUNT(DISTINCT st.session_id) as num_sessions,
|
|
25
|
+
COUNT(DISTINCT ev.id) as num_events,
|
|
26
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
|
|
27
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
|
|
28
|
+
FROM experiments e
|
|
29
|
+
LEFT JOIN session_traces st ON e.experiment_id = st.experiment_id
|
|
30
|
+
LEFT JOIN events ev ON st.session_id = ev.session_id
|
|
31
|
+
GROUP BY e.experiment_id, e.name, e.description, e.created_at
|
|
32
|
+
ORDER BY e.created_at DESC
|
|
33
|
+
""")
|
|
34
|
+
|
|
35
|
+
if df.empty:
|
|
36
|
+
print("No experiments found in database.")
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
print(f"\n{'=' * 100}")
|
|
40
|
+
print(f"{'Experiments in ' + db_url:^100}")
|
|
41
|
+
print(f"{'=' * 100}\n")
|
|
42
|
+
|
|
43
|
+
for _, row in df.iterrows():
|
|
44
|
+
print(f"🧪 {row['name']} (id: {row['experiment_id'][:8]}...)")
|
|
45
|
+
print(f" Created: {row['created_at']}")
|
|
46
|
+
print(f" Description: {row['description']}")
|
|
47
|
+
print(f" Sessions: {row['num_sessions']}")
|
|
48
|
+
print(f" Events: {row['num_events']:,}")
|
|
49
|
+
if row["total_cost"] and row["total_cost"] > 0:
|
|
50
|
+
print(f" Cost: ${row['total_cost']:.4f}")
|
|
51
|
+
if row["total_tokens"] and row["total_tokens"] > 0:
|
|
52
|
+
print(f" Tokens: {int(row['total_tokens']):,}")
|
|
53
|
+
print()
|
|
54
|
+
finally:
|
|
55
|
+
await db.close()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def show_experiment_details(db_url: str, experiment_id: str):
|
|
59
|
+
"""Show detailed information about a specific experiment."""
|
|
60
|
+
db = AsyncSQLTraceManager(db_url)
|
|
61
|
+
await db.initialize()
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
# Get experiment info
|
|
65
|
+
exp_df = await db.query_traces(
|
|
66
|
+
"""
|
|
67
|
+
SELECT * FROM experiments WHERE experiment_id LIKE :exp_id
|
|
68
|
+
""",
|
|
69
|
+
{"exp_id": f"{experiment_id}%"},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if exp_df.empty:
|
|
73
|
+
print(f"No experiment found matching ID: {experiment_id}")
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
exp = exp_df.iloc[0]
|
|
77
|
+
print(f"\n{'=' * 100}")
|
|
78
|
+
print(f"Experiment: {exp['name']} ({exp['experiment_id']})")
|
|
79
|
+
print(f"{'=' * 100}\n")
|
|
80
|
+
|
|
81
|
+
# Get session statistics
|
|
82
|
+
sessions_df = await db.get_sessions_by_experiment(exp["experiment_id"])
|
|
83
|
+
|
|
84
|
+
if sessions_df:
|
|
85
|
+
print(f"Sessions: {len(sessions_df)}")
|
|
86
|
+
|
|
87
|
+
# Get aggregated stats
|
|
88
|
+
stats_df = await db.query_traces(
|
|
89
|
+
"""
|
|
90
|
+
SELECT
|
|
91
|
+
COUNT(DISTINCT ev.id) as total_events,
|
|
92
|
+
COUNT(DISTINCT m.id) as total_messages,
|
|
93
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
|
|
94
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
|
|
95
|
+
FROM session_traces st
|
|
96
|
+
LEFT JOIN events ev ON st.session_id = ev.session_id
|
|
97
|
+
LEFT JOIN messages m ON st.session_id = m.session_id
|
|
98
|
+
WHERE st.experiment_id = :exp_id
|
|
99
|
+
""",
|
|
100
|
+
{"exp_id": exp["experiment_id"]},
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if not stats_df.empty:
|
|
104
|
+
stats = stats_df.iloc[0]
|
|
105
|
+
print(f"Total events: {int(stats['total_events']):,}")
|
|
106
|
+
print(f"Total messages: {int(stats['total_messages']):,}")
|
|
107
|
+
print(f"Total cost: ${stats['total_cost']:.4f}")
|
|
108
|
+
print(f"Total tokens: {int(stats['total_tokens']):,}")
|
|
109
|
+
|
|
110
|
+
# Show session list
|
|
111
|
+
print("\nSession list:")
|
|
112
|
+
for sess in sessions_df:
|
|
113
|
+
print(f" - {sess['session_id']} ({sess['created_at']})")
|
|
114
|
+
print(
|
|
115
|
+
f" Timesteps: {sess['num_timesteps']}, Events: {sess['num_events']}, Messages: {sess['num_messages']}"
|
|
116
|
+
)
|
|
117
|
+
finally:
|
|
118
|
+
await db.close()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
async def show_model_usage(db_url: str, model_name: str | None = None):
|
|
122
|
+
"""Show model usage statistics."""
|
|
123
|
+
db = AsyncSQLTraceManager(db_url)
|
|
124
|
+
await db.initialize()
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
df = await db.get_model_usage(model_name=model_name)
|
|
128
|
+
|
|
129
|
+
if df.empty:
|
|
130
|
+
print("No model usage data found.")
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
print(f"\n{'=' * 100}")
|
|
134
|
+
print(f"{'Model Usage Statistics':^100}")
|
|
135
|
+
print(f"{'=' * 100}\n")
|
|
136
|
+
|
|
137
|
+
print(df.to_string(index=False))
|
|
138
|
+
finally:
|
|
139
|
+
await db.close()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def main():
|
|
143
|
+
parser = argparse.ArgumentParser(description="Query experiments from Turso/sqld (v3)")
|
|
144
|
+
parser.add_argument(
|
|
145
|
+
"-u", "--url", default="sqlite+libsql://http://127.0.0.1:8080", help="Turso database URL"
|
|
146
|
+
)
|
|
147
|
+
parser.add_argument(
|
|
148
|
+
"-e", "--experiment", help="Show details for specific experiment ID (can be partial)"
|
|
149
|
+
)
|
|
150
|
+
parser.add_argument("-m", "--model", help="Show usage for specific model")
|
|
151
|
+
parser.add_argument("--usage", action="store_true", help="Show model usage statistics")
|
|
152
|
+
|
|
153
|
+
args = parser.parse_args()
|
|
154
|
+
|
|
155
|
+
if args.usage or args.model:
|
|
156
|
+
await show_model_usage(args.url, args.model)
|
|
157
|
+
elif args.experiment:
|
|
158
|
+
await show_experiment_details(args.url, args.experiment)
|
|
159
|
+
else:
|
|
160
|
+
await list_experiments(args.url)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
if __name__ == "__main__":
|
|
164
|
+
asyncio.run(main())
|