synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
- examples/sft/evaluate.py +2 -0
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +56 -26
- examples/swe/task_app/hosted/rollout.py +42 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +799 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +2 -2
- synth_ai/api/models/supported.py +1 -0
- synth_ai/api/train/builders.py +25 -11
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +10 -10
- synth_ai/api/train/configs/rl.py +5 -4
- synth_ai/api/train/configs/sft.py +4 -3
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +48 -59
- synth_ai/cli/_modal_wrapper.py +3 -2
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +14 -7
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/recent.py +1 -1
- synth_ai/cli/rl_demo.py +8 -7
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/status.py +1 -1
- synth_ai/cli/task_apps.py +1922 -190
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/tui.py +57 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/evals/client.py +58 -61
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +9 -9
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +24 -5
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +257 -0
- synth_ai/task/contracts.py +138 -39
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +56 -0
- synth_ai/task/rubrics/loaders.py +152 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
- synth_ai/task/server.py +8 -7
- synth_ai/task/trace_correlation_helpers.py +315 -0
- synth_ai/task/validators.py +413 -6
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +5 -5
- synth_ai/tracing_v3/session_tracer.py +16 -6
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/daemon.py +8 -7
- synth_ai/tracing_v3/turso/native_manager.py +66 -43
- synth_ai/tracing_v3/utils.py +3 -3
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +906 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
- examples/agora_ex/README_MoE.md +0 -224
- examples/agora_ex/__init__.py +0 -7
- examples/agora_ex/agora_ex.py +0 -65
- examples/agora_ex/agora_ex_task_app.py +0 -590
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
- examples/agora_ex/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/system_prompt_CURRENT.md +0 -63
- examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
- examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
- synth_ai/rubrics/__init__.py +0 -22
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,514 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
LLM Logger utility for logging all VLM interactions
|
|
4
|
+
|
|
5
|
+
This module provides a centralized logging system for all LLM interactions,
|
|
6
|
+
including input prompts, responses, and metadata. Logs are saved to dated
|
|
7
|
+
files in the llm_logs directory.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import json
|
|
12
|
+
import time
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import Dict, Any, Optional
|
|
15
|
+
import logging
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
class LLMLogger:
|
|
20
|
+
"""Logger for all LLM interactions"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, log_dir: str = "llm_logs"):
|
|
23
|
+
"""Initialize the LLM logger
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
log_dir: Directory to store log files
|
|
27
|
+
"""
|
|
28
|
+
self.log_dir = log_dir
|
|
29
|
+
self.session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
30
|
+
self.log_file = os.path.join(log_dir, f"llm_log_{self.session_id}.jsonl")
|
|
31
|
+
|
|
32
|
+
# Ensure log directory exists
|
|
33
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
34
|
+
|
|
35
|
+
# Initialize cumulative metrics
|
|
36
|
+
self.cumulative_metrics = {
|
|
37
|
+
"total_tokens": 0,
|
|
38
|
+
"prompt_tokens": 0,
|
|
39
|
+
"completion_tokens": 0,
|
|
40
|
+
"total_cost": 0.0,
|
|
41
|
+
"total_actions": 0,
|
|
42
|
+
"start_time": time.time(),
|
|
43
|
+
"total_llm_calls": 0
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Model pricing (per 1K tokens) - can be updated based on actual pricing
|
|
47
|
+
self.pricing = {
|
|
48
|
+
"gpt-4o": {"prompt": 0.01, "completion": 0.03},
|
|
49
|
+
"gpt-4o-mini": {"prompt": 0.00015, "completion": 0.0006},
|
|
50
|
+
"o3-mini": {"prompt": 0.0012, "completion": 0.0048},
|
|
51
|
+
"gemini-2.5-flash": {"prompt": 0.000315, "completion": 0.00126},
|
|
52
|
+
"gemini-2.5-pro": {"prompt": 0.00125, "completion": 0.005},
|
|
53
|
+
"default": {"prompt": 0.001, "completion": 0.002} # Default pricing
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Initialize log file with session info
|
|
57
|
+
self._log_session_start()
|
|
58
|
+
|
|
59
|
+
logger.info(f"LLM Logger initialized: {self.log_file}")
|
|
60
|
+
|
|
61
|
+
def _log_session_start(self):
|
|
62
|
+
"""Log session start information"""
|
|
63
|
+
session_info = {
|
|
64
|
+
"timestamp": datetime.now().isoformat(),
|
|
65
|
+
"type": "session_start",
|
|
66
|
+
"session_id": self.session_id,
|
|
67
|
+
"log_file": self.log_file
|
|
68
|
+
}
|
|
69
|
+
self._write_log_entry(session_info)
|
|
70
|
+
|
|
71
|
+
def log_interaction(self,
|
|
72
|
+
interaction_type: str,
|
|
73
|
+
prompt: str,
|
|
74
|
+
response: str,
|
|
75
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
76
|
+
duration: Optional[float] = None,
|
|
77
|
+
model_info: Optional[Dict[str, Any]] = None):
|
|
78
|
+
"""Log a complete LLM interaction
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
interaction_type: Type of interaction (e.g., "perception", "planning", "action")
|
|
82
|
+
prompt: The input prompt sent to the LLM
|
|
83
|
+
response: The response received from the LLM
|
|
84
|
+
metadata: Additional metadata about the interaction
|
|
85
|
+
duration: Time taken for the interaction in seconds
|
|
86
|
+
model_info: Information about the model used
|
|
87
|
+
"""
|
|
88
|
+
log_entry = {
|
|
89
|
+
"timestamp": datetime.now().isoformat(),
|
|
90
|
+
"type": "interaction",
|
|
91
|
+
"interaction_type": interaction_type,
|
|
92
|
+
"prompt": prompt,
|
|
93
|
+
"response": response,
|
|
94
|
+
"duration": duration,
|
|
95
|
+
"metadata": metadata or {},
|
|
96
|
+
"model_info": model_info or {}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
self._write_log_entry(log_entry)
|
|
100
|
+
|
|
101
|
+
# Update cumulative metrics
|
|
102
|
+
self.cumulative_metrics["total_llm_calls"] += 1
|
|
103
|
+
|
|
104
|
+
# Track token usage if available
|
|
105
|
+
if metadata and "token_usage" in metadata:
|
|
106
|
+
token_usage = metadata["token_usage"]
|
|
107
|
+
if token_usage:
|
|
108
|
+
self.cumulative_metrics["total_tokens"] += token_usage.get("total_tokens", 0)
|
|
109
|
+
self.cumulative_metrics["prompt_tokens"] += token_usage.get("prompt_tokens", 0)
|
|
110
|
+
self.cumulative_metrics["completion_tokens"] += token_usage.get("completion_tokens", 0)
|
|
111
|
+
|
|
112
|
+
# Calculate cost based on model
|
|
113
|
+
model_name = model_info.get("model", "") if model_info else ""
|
|
114
|
+
pricing = self.pricing.get("default")
|
|
115
|
+
for key in self.pricing:
|
|
116
|
+
if key in model_name.lower():
|
|
117
|
+
pricing = self.pricing[key]
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
prompt_cost = (token_usage.get("prompt_tokens", 0) / 1000) * pricing["prompt"]
|
|
121
|
+
completion_cost = (token_usage.get("completion_tokens", 0) / 1000) * pricing["completion"]
|
|
122
|
+
self.cumulative_metrics["total_cost"] += prompt_cost + completion_cost
|
|
123
|
+
|
|
124
|
+
# Track actions if this is an action interaction
|
|
125
|
+
if "action" in interaction_type.lower():
|
|
126
|
+
# Count actions in response - look for valid button presses
|
|
127
|
+
# Response could be single button like "A" or multiple like "A A B" or with commas
|
|
128
|
+
valid_buttons = ['A', 'B', 'SELECT', 'START', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'L', 'R']
|
|
129
|
+
|
|
130
|
+
# Convert response to uppercase and split by spaces or commas
|
|
131
|
+
response_upper = response.upper()
|
|
132
|
+
tokens = response_upper.replace(',', ' ').split()
|
|
133
|
+
|
|
134
|
+
# Count each valid button found
|
|
135
|
+
action_count = sum(1 for token in tokens if token in valid_buttons)
|
|
136
|
+
|
|
137
|
+
# If no actions found but response contains button names, count them
|
|
138
|
+
if action_count == 0:
|
|
139
|
+
# Also check for arrow notations
|
|
140
|
+
action_count += response_upper.count('UP')
|
|
141
|
+
action_count += response_upper.count('DOWN')
|
|
142
|
+
action_count += response_upper.count('LEFT')
|
|
143
|
+
action_count += response_upper.count('RIGHT')
|
|
144
|
+
action_count += response.count('↑')
|
|
145
|
+
action_count += response.count('↓')
|
|
146
|
+
action_count += response.count('←')
|
|
147
|
+
action_count += response.count('→')
|
|
148
|
+
# Count single letter buttons
|
|
149
|
+
for char in 'ABLR':
|
|
150
|
+
if char in response_upper:
|
|
151
|
+
action_count += response_upper.count(char)
|
|
152
|
+
|
|
153
|
+
if action_count > 0:
|
|
154
|
+
self.cumulative_metrics["total_actions"] += action_count
|
|
155
|
+
logger.debug(f"Counted {action_count} actions in response: {response[:50]}")
|
|
156
|
+
|
|
157
|
+
# Also log to console for debugging
|
|
158
|
+
logger.info(f"LLM {interaction_type.upper()}: {duration:.2f}s")
|
|
159
|
+
if duration:
|
|
160
|
+
logger.debug(f"Prompt length: {len(prompt)} chars, Response length: {len(response)} chars")
|
|
161
|
+
|
|
162
|
+
def log_error(self,
|
|
163
|
+
interaction_type: str,
|
|
164
|
+
prompt: str,
|
|
165
|
+
error: str,
|
|
166
|
+
metadata: Optional[Dict[str, Any]] = None):
|
|
167
|
+
"""Log an LLM interaction error
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
interaction_type: Type of interaction that failed
|
|
171
|
+
prompt: The input prompt that was sent
|
|
172
|
+
error: The error message
|
|
173
|
+
metadata: Additional metadata about the error
|
|
174
|
+
"""
|
|
175
|
+
log_entry = {
|
|
176
|
+
"timestamp": datetime.now().isoformat(),
|
|
177
|
+
"type": "error",
|
|
178
|
+
"interaction_type": interaction_type,
|
|
179
|
+
"prompt": prompt,
|
|
180
|
+
"error": error,
|
|
181
|
+
"metadata": metadata or {}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
self._write_log_entry(log_entry)
|
|
185
|
+
logger.error(f"LLM {interaction_type.upper()} ERROR: {error}")
|
|
186
|
+
|
|
187
|
+
def log_step_start(self, step: int, step_type: str = "agent_step"):
|
|
188
|
+
"""Log the start of an agent step
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
step: Step number
|
|
192
|
+
step_type: Type of step (e.g., "agent_step", "perception", "planning")
|
|
193
|
+
"""
|
|
194
|
+
log_entry = {
|
|
195
|
+
"timestamp": datetime.now().isoformat(),
|
|
196
|
+
"type": "step_start",
|
|
197
|
+
"step": step,
|
|
198
|
+
"step_type": step_type
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
self._write_log_entry(log_entry)
|
|
202
|
+
logger.info(f"Starting {step_type} {step}")
|
|
203
|
+
|
|
204
|
+
def log_step_end(self, step: int, step_type: str = "agent_step",
|
|
205
|
+
duration: Optional[float] = None,
|
|
206
|
+
summary: Optional[str] = None):
|
|
207
|
+
"""Log the end of an agent step
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
step: Step number
|
|
211
|
+
step_type: Type of step
|
|
212
|
+
duration: Time taken for the step
|
|
213
|
+
summary: Summary of what happened in the step
|
|
214
|
+
"""
|
|
215
|
+
log_entry = {
|
|
216
|
+
"timestamp": datetime.now().isoformat(),
|
|
217
|
+
"type": "step_end",
|
|
218
|
+
"step": step,
|
|
219
|
+
"step_type": step_type,
|
|
220
|
+
"duration": duration,
|
|
221
|
+
"summary": summary
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
self._write_log_entry(log_entry)
|
|
225
|
+
if duration:
|
|
226
|
+
logger.info(f"Completed {step_type} {step} in {duration:.2f}s")
|
|
227
|
+
else:
|
|
228
|
+
logger.info(f"Completed {step_type} {step}")
|
|
229
|
+
|
|
230
|
+
def log_state_snapshot(self, state_data: Dict[str, Any], step: int):
|
|
231
|
+
"""Log a snapshot of the game state
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
state_data: The game state data
|
|
235
|
+
step: Current step number
|
|
236
|
+
"""
|
|
237
|
+
# Extract key information to avoid logging too much data
|
|
238
|
+
state_summary = {
|
|
239
|
+
"step": step,
|
|
240
|
+
"player_location": state_data.get("player", {}).get("location"),
|
|
241
|
+
"player_position": state_data.get("player", {}).get("position"),
|
|
242
|
+
"game_state": state_data.get("game", {}).get("game_state"),
|
|
243
|
+
"is_in_battle": state_data.get("game", {}).get("is_in_battle"),
|
|
244
|
+
"party_size": len(state_data.get("player", {}).get("party", [])),
|
|
245
|
+
"money": state_data.get("game", {}).get("money"),
|
|
246
|
+
"dialog_text": state_data.get("game", {}).get("dialog_text", "")[:100] + "..." if state_data.get("game", {}).get("dialog_text") else None
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
log_entry = {
|
|
250
|
+
"timestamp": datetime.now().isoformat(),
|
|
251
|
+
"type": "state_snapshot",
|
|
252
|
+
"step": step,
|
|
253
|
+
"state_summary": state_summary
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
self._write_log_entry(log_entry)
|
|
257
|
+
|
|
258
|
+
def log_action(self, action: str, step: int, reasoning: Optional[str] = None):
|
|
259
|
+
"""Log an action taken by the agent
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
action: The action taken
|
|
263
|
+
step: Current step number
|
|
264
|
+
reasoning: Reasoning behind the action
|
|
265
|
+
"""
|
|
266
|
+
log_entry = {
|
|
267
|
+
"timestamp": datetime.now().isoformat(),
|
|
268
|
+
"type": "action",
|
|
269
|
+
"step": step,
|
|
270
|
+
"action": action,
|
|
271
|
+
"reasoning": reasoning
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
self._write_log_entry(log_entry)
|
|
275
|
+
logger.info(f"Action {step}: {action}")
|
|
276
|
+
if reasoning:
|
|
277
|
+
logger.debug(f"Reasoning: {reasoning}")
|
|
278
|
+
|
|
279
|
+
def _write_log_entry(self, log_entry: Dict[str, Any]):
|
|
280
|
+
"""Write a log entry to the log file
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
log_entry: The log entry to write
|
|
284
|
+
"""
|
|
285
|
+
try:
|
|
286
|
+
with open(self.log_file, 'a', encoding='utf-8') as f:
|
|
287
|
+
f.write(json.dumps(log_entry, ensure_ascii=False) + '\n')
|
|
288
|
+
except Exception as e:
|
|
289
|
+
logger.error(f"Failed to write log entry: {e}")
|
|
290
|
+
|
|
291
|
+
def get_cumulative_metrics(self) -> Dict[str, Any]:
|
|
292
|
+
"""Get cumulative metrics for the session
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
Dictionary with cumulative metrics
|
|
296
|
+
"""
|
|
297
|
+
# Update runtime
|
|
298
|
+
self.cumulative_metrics["total_run_time"] = time.time() - self.cumulative_metrics["start_time"]
|
|
299
|
+
return self.cumulative_metrics.copy()
|
|
300
|
+
|
|
301
|
+
def get_session_summary(self) -> Dict[str, Any]:
|
|
302
|
+
"""Get a summary of the current session
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Dictionary with session summary information
|
|
306
|
+
"""
|
|
307
|
+
try:
|
|
308
|
+
with open(self.log_file, 'r', encoding='utf-8') as f:
|
|
309
|
+
lines = f.readlines()
|
|
310
|
+
|
|
311
|
+
interactions = 0
|
|
312
|
+
errors = 0
|
|
313
|
+
total_duration = 0.0
|
|
314
|
+
|
|
315
|
+
for line in lines:
|
|
316
|
+
try:
|
|
317
|
+
entry = json.loads(line.strip())
|
|
318
|
+
if entry.get("type") == "interaction":
|
|
319
|
+
interactions += 1
|
|
320
|
+
if entry.get("duration"):
|
|
321
|
+
total_duration += entry["duration"]
|
|
322
|
+
elif entry.get("type") == "error":
|
|
323
|
+
errors += 1
|
|
324
|
+
except json.JSONDecodeError:
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
"session_id": self.session_id,
|
|
329
|
+
"log_file": self.log_file,
|
|
330
|
+
"total_interactions": interactions,
|
|
331
|
+
"total_errors": errors,
|
|
332
|
+
"total_duration": total_duration,
|
|
333
|
+
"average_duration": total_duration / interactions if interactions > 0 else 0
|
|
334
|
+
}
|
|
335
|
+
except Exception as e:
|
|
336
|
+
logger.error(f"Failed to get session summary: {e}")
|
|
337
|
+
return {"error": str(e)}
|
|
338
|
+
|
|
339
|
+
def save_checkpoint(self, checkpoint_file: str = None, agent_step_count: int = None):
|
|
340
|
+
"""Save current LLM interaction history to checkpoint file
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
checkpoint_file: Path to save the checkpoint (defaults to cache folder)
|
|
344
|
+
agent_step_count: Current agent step count for persistence
|
|
345
|
+
"""
|
|
346
|
+
try:
|
|
347
|
+
# Use cache folder by default
|
|
348
|
+
if checkpoint_file is None or checkpoint_file == "checkpoint_llm.txt":
|
|
349
|
+
cache_dir = ".pokeagent_cache"
|
|
350
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
351
|
+
checkpoint_file = os.path.join(cache_dir, "checkpoint_llm.txt")
|
|
352
|
+
# Read all current log entries
|
|
353
|
+
log_entries = []
|
|
354
|
+
if os.path.exists(self.log_file):
|
|
355
|
+
with open(self.log_file, 'r', encoding='utf-8') as f:
|
|
356
|
+
for line in f:
|
|
357
|
+
try:
|
|
358
|
+
log_entries.append(json.loads(line.strip()))
|
|
359
|
+
except json.JSONDecodeError:
|
|
360
|
+
continue
|
|
361
|
+
|
|
362
|
+
# Update run time in metrics
|
|
363
|
+
self.cumulative_metrics["total_run_time"] = time.time() - self.cumulative_metrics["start_time"]
|
|
364
|
+
|
|
365
|
+
# Add checkpoint metadata
|
|
366
|
+
checkpoint_data = {
|
|
367
|
+
"checkpoint_timestamp": datetime.now().isoformat(),
|
|
368
|
+
"session_id": self.session_id,
|
|
369
|
+
"original_log_file": self.log_file,
|
|
370
|
+
"total_entries": len(log_entries),
|
|
371
|
+
"agent_step_count": agent_step_count, # Save current step count
|
|
372
|
+
"cumulative_metrics": self.cumulative_metrics, # Save metrics
|
|
373
|
+
"log_entries": log_entries
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
# Add map stitcher data if available via callback
|
|
377
|
+
if hasattr(self, '_map_stitcher_callback') and self._map_stitcher_callback:
|
|
378
|
+
try:
|
|
379
|
+
self._map_stitcher_callback(checkpoint_data)
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.debug(f"Failed to save map stitcher to checkpoint: {e}")
|
|
382
|
+
|
|
383
|
+
# Save to checkpoint file
|
|
384
|
+
with open(checkpoint_file, 'w', encoding='utf-8') as f:
|
|
385
|
+
json.dump(checkpoint_data, f, indent=2, ensure_ascii=False)
|
|
386
|
+
|
|
387
|
+
logger.info(f"LLM checkpoint saved: {checkpoint_file} ({len(log_entries)} entries)")
|
|
388
|
+
|
|
389
|
+
except Exception as e:
|
|
390
|
+
logger.error(f"Failed to save LLM checkpoint: {e}")
|
|
391
|
+
|
|
392
|
+
def load_checkpoint(self, checkpoint_file: str = None) -> Optional[int]:
|
|
393
|
+
"""Load LLM interaction history from checkpoint file
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
checkpoint_file: Path to load the checkpoint from (defaults to cache folder)
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
Last agent step count from the checkpoint, or None if not found
|
|
400
|
+
"""
|
|
401
|
+
try:
|
|
402
|
+
# Use cache folder by default
|
|
403
|
+
if checkpoint_file is None or checkpoint_file == "checkpoint_llm.txt":
|
|
404
|
+
cache_dir = ".pokeagent_cache"
|
|
405
|
+
checkpoint_file = os.path.join(cache_dir, "checkpoint_llm.txt")
|
|
406
|
+
|
|
407
|
+
if not os.path.exists(checkpoint_file):
|
|
408
|
+
logger.info(f"No checkpoint file found at {checkpoint_file}")
|
|
409
|
+
return None
|
|
410
|
+
with open(checkpoint_file, 'r', encoding='utf-8') as f:
|
|
411
|
+
checkpoint_data = json.load(f)
|
|
412
|
+
|
|
413
|
+
log_entries = checkpoint_data.get("log_entries", [])
|
|
414
|
+
|
|
415
|
+
# Restore cumulative metrics if available
|
|
416
|
+
if "cumulative_metrics" in checkpoint_data:
|
|
417
|
+
saved_metrics = checkpoint_data["cumulative_metrics"]
|
|
418
|
+
# Restore all metrics including the original start_time
|
|
419
|
+
self.cumulative_metrics.update(saved_metrics)
|
|
420
|
+
|
|
421
|
+
# If the checkpoint has a start_time, use it to preserve the original session start
|
|
422
|
+
if "start_time" in saved_metrics:
|
|
423
|
+
logger.info(f"Restored original start time from checkpoint: {saved_metrics['start_time']}")
|
|
424
|
+
else:
|
|
425
|
+
logger.warning("No start_time found in checkpoint, using current time")
|
|
426
|
+
|
|
427
|
+
# Restore log entries to current log file
|
|
428
|
+
with open(self.log_file, 'w', encoding='utf-8') as f:
|
|
429
|
+
for entry in log_entries:
|
|
430
|
+
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
|
|
431
|
+
|
|
432
|
+
# Try to get step count from checkpoint metadata first
|
|
433
|
+
last_step = checkpoint_data.get("agent_step_count")
|
|
434
|
+
|
|
435
|
+
# If not in metadata, find the last agent step from log entries
|
|
436
|
+
if last_step is None:
|
|
437
|
+
for entry in reversed(log_entries):
|
|
438
|
+
if entry.get("type") == "step_start" and "step_number" in entry:
|
|
439
|
+
last_step = entry["step_number"]
|
|
440
|
+
break
|
|
441
|
+
|
|
442
|
+
logger.info(f"LLM checkpoint loaded: {checkpoint_file} ({len(log_entries)} entries, step {last_step})")
|
|
443
|
+
|
|
444
|
+
# Load map stitcher data if available via callback
|
|
445
|
+
if hasattr(self, '_map_stitcher_load_callback') and self._map_stitcher_load_callback:
|
|
446
|
+
try:
|
|
447
|
+
self._map_stitcher_load_callback(checkpoint_data)
|
|
448
|
+
except Exception as e:
|
|
449
|
+
logger.debug(f"Failed to load map stitcher from checkpoint: {e}")
|
|
450
|
+
|
|
451
|
+
return last_step
|
|
452
|
+
|
|
453
|
+
except Exception as e:
|
|
454
|
+
logger.error(f"Failed to load LLM checkpoint: {e}")
|
|
455
|
+
return None
|
|
456
|
+
|
|
457
|
+
# Global logger instance
|
|
458
|
+
_llm_logger = None
|
|
459
|
+
|
|
460
|
+
def get_llm_logger() -> LLMLogger:
|
|
461
|
+
"""Get the global LLM logger instance
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
The global LLM logger instance
|
|
465
|
+
"""
|
|
466
|
+
global _llm_logger
|
|
467
|
+
if _llm_logger is None:
|
|
468
|
+
_llm_logger = LLMLogger()
|
|
469
|
+
return _llm_logger
|
|
470
|
+
|
|
471
|
+
def setup_map_stitcher_checkpoint_integration(memory_reader):
|
|
472
|
+
"""Set up map stitcher integration with checkpoint system"""
|
|
473
|
+
logger = get_llm_logger()
|
|
474
|
+
|
|
475
|
+
def save_callback(checkpoint_data):
|
|
476
|
+
if hasattr(memory_reader, '_map_stitcher') and memory_reader._map_stitcher:
|
|
477
|
+
memory_reader._map_stitcher.save_to_checkpoint(checkpoint_data)
|
|
478
|
+
|
|
479
|
+
def load_callback(checkpoint_data):
|
|
480
|
+
if hasattr(memory_reader, '_map_stitcher') and memory_reader._map_stitcher:
|
|
481
|
+
memory_reader._map_stitcher.load_from_checkpoint(checkpoint_data)
|
|
482
|
+
|
|
483
|
+
logger._map_stitcher_callback = save_callback
|
|
484
|
+
logger._map_stitcher_load_callback = load_callback
|
|
485
|
+
|
|
486
|
+
def log_llm_interaction(interaction_type: str, prompt: str, response: str,
|
|
487
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
488
|
+
duration: Optional[float] = None,
|
|
489
|
+
model_info: Optional[Dict[str, Any]] = None):
|
|
490
|
+
"""Convenience function to log an LLM interaction
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
interaction_type: Type of interaction
|
|
494
|
+
prompt: Input prompt
|
|
495
|
+
response: LLM response
|
|
496
|
+
metadata: Additional metadata
|
|
497
|
+
duration: Time taken
|
|
498
|
+
model_info: Model information
|
|
499
|
+
"""
|
|
500
|
+
logger = get_llm_logger()
|
|
501
|
+
logger.log_interaction(interaction_type, prompt, response, metadata, duration, model_info)
|
|
502
|
+
|
|
503
|
+
def log_llm_error(interaction_type: str, prompt: str, error: str,
|
|
504
|
+
metadata: Optional[Dict[str, Any]] = None):
|
|
505
|
+
"""Convenience function to log an LLM error
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
interaction_type: Type of interaction that failed
|
|
509
|
+
prompt: Input prompt
|
|
510
|
+
error: Error message
|
|
511
|
+
metadata: Additional metadata
|
|
512
|
+
"""
|
|
513
|
+
logger = get_llm_logger()
|
|
514
|
+
logger.log_error(interaction_type, prompt, error, metadata)
|