synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
- examples/sft/evaluate.py +2 -0
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +56 -26
- examples/swe/task_app/hosted/rollout.py +42 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +799 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +2 -2
- synth_ai/api/models/supported.py +1 -0
- synth_ai/api/train/builders.py +25 -11
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +10 -10
- synth_ai/api/train/configs/rl.py +5 -4
- synth_ai/api/train/configs/sft.py +4 -3
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +48 -59
- synth_ai/cli/_modal_wrapper.py +3 -2
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +14 -7
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/recent.py +1 -1
- synth_ai/cli/rl_demo.py +8 -7
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/status.py +1 -1
- synth_ai/cli/task_apps.py +1922 -190
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/tui.py +57 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/evals/client.py +58 -61
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +9 -9
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +24 -5
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +257 -0
- synth_ai/task/contracts.py +138 -39
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +56 -0
- synth_ai/task/rubrics/loaders.py +152 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
- synth_ai/task/server.py +8 -7
- synth_ai/task/trace_correlation_helpers.py +315 -0
- synth_ai/task/validators.py +413 -6
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +5 -5
- synth_ai/tracing_v3/session_tracer.py +16 -6
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/daemon.py +8 -7
- synth_ai/tracing_v3/turso/native_manager.py +66 -43
- synth_ai/tracing_v3/utils.py +3 -3
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +906 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
- examples/agora_ex/README_MoE.md +0 -224
- examples/agora_ex/__init__.py +0 -7
- examples/agora_ex/agora_ex.py +0 -65
- examples/agora_ex/agora_ex_task_app.py +0 -590
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
- examples/agora_ex/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/system_prompt_CURRENT.md +0 -63
- examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
- examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
- synth_ai/rubrics/__init__.py +0 -22
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
synth_ai/task/validators.py
CHANGED
|
@@ -1,11 +1,418 @@
|
|
|
1
|
+
"""Task app validation utilities."""
|
|
2
|
+
|
|
1
3
|
from __future__ import annotations
|
|
2
4
|
|
|
3
|
-
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any
|
|
7
|
+
from urllib.parse import urlparse, urlunparse
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from synth_ai.task.contracts import TaskAppEndpoints # type: ignore[attr-defined]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def validate_rollout_response_for_rl(response_data: dict[str, Any], *, warn_only: bool = False) -> list[str]:
|
|
16
|
+
"""Validate that a task app rollout response has required fields for RL training.
|
|
17
|
+
|
|
18
|
+
The backend RL trainer requires:
|
|
19
|
+
1. pipeline_metadata["inference_url"] at top level (with ?cid= for trace correlation)
|
|
20
|
+
2. Each step's info.meta["inference_url"] must be present (nested structure!)
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
response_data: The rollout response dict from task app
|
|
24
|
+
warn_only: If True, return warnings instead of raising exceptions
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
List of validation warnings/errors
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
ValueError: If critical fields are missing (unless warn_only=True)
|
|
31
|
+
"""
|
|
32
|
+
issues = []
|
|
33
|
+
|
|
34
|
+
# Check pipeline_metadata
|
|
35
|
+
pipeline_metadata = response_data.get("pipeline_metadata")
|
|
36
|
+
if not isinstance(pipeline_metadata, dict):
|
|
37
|
+
issues.append("Missing or invalid 'pipeline_metadata' (required for RL training)")
|
|
38
|
+
else:
|
|
39
|
+
inference_url = pipeline_metadata.get("inference_url")
|
|
40
|
+
if not inference_url:
|
|
41
|
+
issues.append(
|
|
42
|
+
"pipeline_metadata['inference_url'] is missing. "
|
|
43
|
+
"RL trainer requires this field to extract traces."
|
|
44
|
+
)
|
|
45
|
+
elif not isinstance(inference_url, str):
|
|
46
|
+
issues.append(
|
|
47
|
+
f"pipeline_metadata['inference_url'] must be a string, got: {type(inference_url).__name__}"
|
|
48
|
+
)
|
|
49
|
+
elif "?cid=" not in inference_url:
|
|
50
|
+
issues.append(
|
|
51
|
+
f"pipeline_metadata['inference_url'] should contain '?cid=' for trace correlation. "
|
|
52
|
+
f"Got: {inference_url[:80]}..."
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Check trajectories and steps
|
|
56
|
+
trajectories = response_data.get("trajectories", [])
|
|
57
|
+
if not trajectories:
|
|
58
|
+
issues.append("No trajectories found in response")
|
|
59
|
+
|
|
60
|
+
for traj_idx, trajectory in enumerate(trajectories):
|
|
61
|
+
if not isinstance(trajectory, dict):
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
steps = trajectory.get("steps", [])
|
|
65
|
+
for step_idx, step in enumerate(steps):
|
|
66
|
+
if not isinstance(step, dict):
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
step_info = step.get("info", {})
|
|
70
|
+
if not isinstance(step_info, dict):
|
|
71
|
+
issues.append(
|
|
72
|
+
f"trajectory[{traj_idx}].steps[{step_idx}].info is not a dict"
|
|
73
|
+
)
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
# Check for nested meta.inference_url (backend expects this structure!)
|
|
77
|
+
step_meta = step_info.get("meta", {})
|
|
78
|
+
if not isinstance(step_meta, dict):
|
|
79
|
+
issues.append(
|
|
80
|
+
f"trajectory[{traj_idx}].steps[{step_idx}].info.meta is missing or not a dict. "
|
|
81
|
+
f"RL trainer expects nested structure: info.meta.inference_url"
|
|
82
|
+
)
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
step_inference_url = step_meta.get("inference_url")
|
|
86
|
+
if not step_inference_url:
|
|
87
|
+
issues.append(
|
|
88
|
+
f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] is missing. "
|
|
89
|
+
f"RL trainer needs this for trace extraction (nested structure required!)"
|
|
90
|
+
)
|
|
91
|
+
elif not isinstance(step_inference_url, str):
|
|
92
|
+
issues.append(
|
|
93
|
+
f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] must be a string, "
|
|
94
|
+
f"got: {type(step_inference_url).__name__}"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if issues and not warn_only:
|
|
98
|
+
error_msg = "Task app response validation failed for RL training:\n" + "\n".join(
|
|
99
|
+
f" - {issue}" for issue in issues
|
|
100
|
+
)
|
|
101
|
+
raise ValueError(error_msg)
|
|
102
|
+
|
|
103
|
+
return issues
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def normalize_inference_url(url: str | None, *, default: str = "https://api.openai.com/v1/chat/completions") -> str:
|
|
107
|
+
"""Normalize an inference URL to include the /v1/chat/completions path.
|
|
108
|
+
|
|
109
|
+
This utility ensures inference URLs have the correct path structure for OpenAI-compatible
|
|
110
|
+
chat completions endpoints, while preserving query parameters (e.g., ?cid=trace_123)
|
|
111
|
+
that may be added for tracing.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
url: The inference URL to normalize (may be None or incomplete)
|
|
115
|
+
default: Default URL to use if url is None/empty
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Normalized URL with proper path and preserved query parameters
|
|
119
|
+
|
|
120
|
+
Examples:
|
|
121
|
+
>>> normalize_inference_url("https://api.groq.com")
|
|
122
|
+
'https://api.groq.com/v1/chat/completions'
|
|
123
|
+
|
|
124
|
+
>>> normalize_inference_url("https://modal.host?cid=trace_123")
|
|
125
|
+
'https://modal.host/v1/chat/completions?cid=trace_123'
|
|
126
|
+
|
|
127
|
+
>>> normalize_inference_url("https://api.openai.com/v1")
|
|
128
|
+
'https://api.openai.com/v1/chat/completions'
|
|
129
|
+
|
|
130
|
+
>>> normalize_inference_url("https://api.groq.com/openai/v1/chat/completions")
|
|
131
|
+
'https://api.groq.com/openai/v1/chat/completions'
|
|
132
|
+
"""
|
|
133
|
+
candidate = (url or default).strip()
|
|
134
|
+
if not candidate:
|
|
135
|
+
candidate = default
|
|
136
|
+
|
|
137
|
+
# Parse the URL to separate path and query components
|
|
138
|
+
parsed = urlparse(candidate)
|
|
139
|
+
|
|
140
|
+
# Check if path already ends with a completions endpoint
|
|
141
|
+
path = parsed.path.rstrip('/')
|
|
142
|
+
if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
|
|
143
|
+
return candidate
|
|
144
|
+
|
|
145
|
+
# Determine what to append based on existing path
|
|
146
|
+
if path.endswith("/v1"):
|
|
147
|
+
new_path = f"{path}/chat/completions"
|
|
148
|
+
elif path.endswith("/chat"):
|
|
149
|
+
new_path = f"{path}/completions"
|
|
150
|
+
else:
|
|
151
|
+
# Default: append full path
|
|
152
|
+
new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
|
|
153
|
+
|
|
154
|
+
# Reconstruct URL with new path and original query/fragment
|
|
155
|
+
return urlunparse(parsed._replace(path=new_path))
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def validate_task_app_url(url: str | None) -> str:
|
|
159
|
+
"""Validate and normalize a task app URL.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
url: URL to validate
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Normalized URL
|
|
166
|
+
|
|
167
|
+
Raises:
|
|
168
|
+
ValueError: If URL is invalid
|
|
169
|
+
"""
|
|
170
|
+
if not url:
|
|
171
|
+
raise ValueError("Task app URL is required")
|
|
172
|
+
|
|
173
|
+
url = url.strip().rstrip("/")
|
|
174
|
+
|
|
175
|
+
# Basic URL validation
|
|
176
|
+
url_pattern = re.compile(
|
|
177
|
+
r"^https?://" # http:// or https://
|
|
178
|
+
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|" # domain...
|
|
179
|
+
r"localhost|" # localhost...
|
|
180
|
+
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
|
|
181
|
+
r"(?::\d+)?" # optional port
|
|
182
|
+
r"(?:/?|[/?]\S+)$",
|
|
183
|
+
re.IGNORECASE,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if not url_pattern.match(url):
|
|
187
|
+
raise ValueError(f"Invalid task app URL: {url}")
|
|
188
|
+
|
|
189
|
+
return url
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _print_success(msg: str) -> None:
|
|
193
|
+
"""Print success message in green."""
|
|
194
|
+
click.echo(click.style(f"✓ {msg}", fg="green"))
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _print_error(msg: str) -> None:
|
|
198
|
+
"""Print error message in red."""
|
|
199
|
+
click.echo(click.style(f"✗ {msg}", fg="red"), err=True)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _print_warning(msg: str) -> None:
|
|
203
|
+
"""Print warning message in yellow."""
|
|
204
|
+
click.echo(click.style(f"⚠ {msg}", fg="yellow"))
|
|
205
|
+
|
|
4
206
|
|
|
207
|
+
def _print_info(msg: str) -> None:
|
|
208
|
+
"""Print info message."""
|
|
209
|
+
click.echo(f" {msg}")
|
|
5
210
|
|
|
6
|
-
def validate_task_app_url(url: str, *, name: str = "TASK_APP_BASE_URL") -> None:
|
|
7
|
-
"""Validate a Task App base URL (scheme + host present)."""
|
|
8
211
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
212
|
+
async def validate_task_app_endpoint(
|
|
213
|
+
url: str,
|
|
214
|
+
api_key: str | None = None,
|
|
215
|
+
min_instances: int = 10,
|
|
216
|
+
verbose: bool = False,
|
|
217
|
+
) -> tuple[bool, dict[str, Any]]:
|
|
218
|
+
"""Validate a task app deployment.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
(success: bool, results: dict)
|
|
222
|
+
"""
|
|
223
|
+
results: dict[str, Any] = {
|
|
224
|
+
"url": url,
|
|
225
|
+
"endpoints": {},
|
|
226
|
+
"auth": {},
|
|
227
|
+
"task_instances": {},
|
|
228
|
+
"overall": False,
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
all_passed = True
|
|
232
|
+
endpoints = TaskAppEndpoints()
|
|
233
|
+
|
|
234
|
+
# Set up headers
|
|
235
|
+
headers = {}
|
|
236
|
+
if api_key:
|
|
237
|
+
headers["X-API-Key"] = api_key
|
|
238
|
+
|
|
239
|
+
click.echo(f"\n{'='*60}")
|
|
240
|
+
click.echo(f"Validating Task App: {url}")
|
|
241
|
+
click.echo(f"{'='*60}\n")
|
|
242
|
+
|
|
243
|
+
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
|
244
|
+
# 1. Check root endpoint
|
|
245
|
+
click.echo("1. Checking root endpoint...")
|
|
246
|
+
try:
|
|
247
|
+
resp = await client.get(f"{url}{endpoints.root}")
|
|
248
|
+
if resp.status_code == 200:
|
|
249
|
+
data = resp.json()
|
|
250
|
+
_print_success(f"Root endpoint responds (status: {data.get('status')})")
|
|
251
|
+
results["endpoints"]["root"] = {"passed": True, "data": data}
|
|
252
|
+
if verbose:
|
|
253
|
+
_print_info(f"Service: {data.get('service', 'N/A')}")
|
|
254
|
+
else:
|
|
255
|
+
_print_error(f"Root endpoint returned {resp.status_code}")
|
|
256
|
+
results["endpoints"]["root"] = {"passed": False, "status": resp.status_code}
|
|
257
|
+
all_passed = False
|
|
258
|
+
except Exception as e:
|
|
259
|
+
_print_error(f"Root endpoint failed: {e}")
|
|
260
|
+
results["endpoints"]["root"] = {"passed": False, "error": str(e)}
|
|
261
|
+
all_passed = False
|
|
262
|
+
|
|
263
|
+
# 2. Check health endpoint
|
|
264
|
+
click.echo("\n2. Checking health endpoint...")
|
|
265
|
+
try:
|
|
266
|
+
resp = await client.get(f"{url}{endpoints.health}", headers=headers)
|
|
267
|
+
if resp.status_code == 200:
|
|
268
|
+
data = resp.json()
|
|
269
|
+
_print_success(f"Health endpoint responds (healthy: {data.get('healthy')})")
|
|
270
|
+
results["endpoints"]["health"] = {"passed": True, "data": data}
|
|
271
|
+
|
|
272
|
+
# Check auth configuration
|
|
273
|
+
auth_info = data.get("auth", {})
|
|
274
|
+
if auth_info.get("required"):
|
|
275
|
+
_print_info(f"Auth required: {auth_info.get('required')}")
|
|
276
|
+
_print_info(f"Expected key prefix: {auth_info.get('expected_prefix', 'N/A')}")
|
|
277
|
+
|
|
278
|
+
if api_key:
|
|
279
|
+
_print_success("API key provided and accepted")
|
|
280
|
+
results["auth"]["provided"] = True
|
|
281
|
+
results["auth"]["accepted"] = True
|
|
282
|
+
else:
|
|
283
|
+
_print_warning("No API key provided but may be required")
|
|
284
|
+
results["auth"]["provided"] = False
|
|
285
|
+
results["auth"]["required"] = True
|
|
286
|
+
else:
|
|
287
|
+
_print_error(f"Health endpoint returned {resp.status_code}")
|
|
288
|
+
results["endpoints"]["health"] = {"passed": False, "status": resp.status_code}
|
|
289
|
+
all_passed = False
|
|
290
|
+
|
|
291
|
+
if resp.status_code == 403:
|
|
292
|
+
_print_error("Authentication failed - provide API key with --api-key")
|
|
293
|
+
results["auth"]["error"] = "Authentication failed"
|
|
294
|
+
|
|
295
|
+
except Exception as e:
|
|
296
|
+
_print_error(f"Health endpoint failed: {e}")
|
|
297
|
+
results["endpoints"]["health"] = {"passed": False, "error": str(e)}
|
|
298
|
+
all_passed = False
|
|
299
|
+
|
|
300
|
+
# 3. Check info endpoint
|
|
301
|
+
click.echo("\n3. Checking info endpoint...")
|
|
302
|
+
try:
|
|
303
|
+
resp = await client.get(f"{url}{endpoints.info}", headers=headers)
|
|
304
|
+
if resp.status_code == 200:
|
|
305
|
+
data = resp.json()
|
|
306
|
+
_print_success("Info endpoint responds")
|
|
307
|
+
results["endpoints"]["info"] = {"passed": True, "data": data}
|
|
308
|
+
|
|
309
|
+
if verbose:
|
|
310
|
+
service = data.get("service", {})
|
|
311
|
+
task_info = service.get("task", {})
|
|
312
|
+
if isinstance(task_info, dict):
|
|
313
|
+
_print_info(f"Task: {task_info.get('name', 'N/A')}")
|
|
314
|
+
_print_info(f"Version: {service.get('version', 'N/A')}")
|
|
315
|
+
|
|
316
|
+
dataset = data.get("dataset", {})
|
|
317
|
+
if isinstance(dataset, dict):
|
|
318
|
+
_print_info(f"Dataset: {dataset.get('id', 'N/A')}")
|
|
319
|
+
else:
|
|
320
|
+
_print_error(f"Info endpoint returned {resp.status_code}")
|
|
321
|
+
results["endpoints"]["info"] = {"passed": False, "status": resp.status_code}
|
|
322
|
+
all_passed = False
|
|
323
|
+
except Exception as e:
|
|
324
|
+
_print_error(f"Info endpoint failed: {e}")
|
|
325
|
+
results["endpoints"]["info"] = {"passed": False, "error": str(e)}
|
|
326
|
+
all_passed = False
|
|
327
|
+
|
|
328
|
+
# 4. Check task_info endpoint and instance count
|
|
329
|
+
click.echo("\n4. Checking task_info endpoint and instance availability...")
|
|
330
|
+
try:
|
|
331
|
+
# Get taskset descriptor first
|
|
332
|
+
resp = await client.get(f"{url}{endpoints.task_info}", headers=headers)
|
|
333
|
+
if resp.status_code == 200:
|
|
334
|
+
data = resp.json()
|
|
335
|
+
_print_success("Task info endpoint responds")
|
|
336
|
+
results["endpoints"]["task_info"] = {"passed": True}
|
|
337
|
+
|
|
338
|
+
taskset = data.get("taskset", {})
|
|
339
|
+
if verbose and taskset:
|
|
340
|
+
if isinstance(taskset, dict):
|
|
341
|
+
_print_info(f"Taskset: {taskset.get('id', 'N/A')}")
|
|
342
|
+
else:
|
|
343
|
+
_print_info(f"Taskset: {taskset}")
|
|
344
|
+
|
|
345
|
+
# Try to get specific task instances (seeds 0-19)
|
|
346
|
+
# Fetch instances one by one to verify we can get at least min_instances
|
|
347
|
+
instances = []
|
|
348
|
+
for seed in range(min_instances + 5): # Try a few extra
|
|
349
|
+
try:
|
|
350
|
+
resp_seed = await client.get(
|
|
351
|
+
f"{url}{endpoints.task_info}",
|
|
352
|
+
params={"seed": seed},
|
|
353
|
+
headers=headers,
|
|
354
|
+
)
|
|
355
|
+
if resp_seed.status_code == 200:
|
|
356
|
+
instance = resp_seed.json()
|
|
357
|
+
instances.append(instance)
|
|
358
|
+
else:
|
|
359
|
+
break # Stop if we hit an invalid seed
|
|
360
|
+
except Exception:
|
|
361
|
+
break
|
|
362
|
+
|
|
363
|
+
instance_count = len(instances)
|
|
364
|
+
results["task_instances"]["count"] = instance_count
|
|
365
|
+
results["task_instances"]["requested"] = min_instances
|
|
366
|
+
|
|
367
|
+
if instance_count >= min_instances:
|
|
368
|
+
_print_success(f"Found {instance_count} task instances (≥ {min_instances} required)")
|
|
369
|
+
results["task_instances"]["passed"] = True
|
|
370
|
+
|
|
371
|
+
if verbose and instances:
|
|
372
|
+
sample = instances[0]
|
|
373
|
+
task_info_sample = sample.get('task', {})
|
|
374
|
+
if isinstance(task_info_sample, dict):
|
|
375
|
+
_print_info(f"Sample task: {task_info_sample.get('name', 'N/A')}")
|
|
376
|
+
_print_info(f"Environment: {sample.get('environment', 'N/A')}")
|
|
377
|
+
else:
|
|
378
|
+
_print_error(f"Only {instance_count} task instances available (need ≥ {min_instances})")
|
|
379
|
+
results["task_instances"]["passed"] = False
|
|
380
|
+
all_passed = False
|
|
381
|
+
else:
|
|
382
|
+
_print_error(f"Task info endpoint returned {resp.status_code}")
|
|
383
|
+
results["endpoints"]["task_info"] = {"passed": False, "status": resp.status_code}
|
|
384
|
+
all_passed = False
|
|
385
|
+
except Exception as e:
|
|
386
|
+
_print_error(f"Task info endpoint failed: {e}")
|
|
387
|
+
results["endpoints"]["task_info"] = {"passed": False, "error": str(e)}
|
|
388
|
+
results["task_instances"]["passed"] = False
|
|
389
|
+
all_passed = False
|
|
390
|
+
|
|
391
|
+
# 5. Check rollout endpoint structure (don't actually run a rollout)
|
|
392
|
+
click.echo("\n5. Checking rollout endpoint availability...")
|
|
393
|
+
try:
|
|
394
|
+
# Just check if it's registered (OPTIONS or a lightweight probe)
|
|
395
|
+
resp = await client.options(f"{url}{endpoints.rollout}", headers=headers)
|
|
396
|
+
# Many servers return 200 for OPTIONS, some return 405
|
|
397
|
+
if resp.status_code in (200, 204, 405):
|
|
398
|
+
_print_success("Rollout endpoint is registered")
|
|
399
|
+
results["endpoints"]["rollout"] = {"passed": True}
|
|
400
|
+
else:
|
|
401
|
+
_print_warning(f"Rollout endpoint returned unexpected status: {resp.status_code}")
|
|
402
|
+
results["endpoints"]["rollout"] = {"passed": True, "note": "endpoint exists"}
|
|
403
|
+
except Exception as e:
|
|
404
|
+
# OPTIONS might not be supported, that's okay
|
|
405
|
+
_print_info(f"Rollout endpoint check skipped (OPTIONS not supported): {e}")
|
|
406
|
+
results["endpoints"]["rollout"] = {"passed": True, "note": "assumed present"}
|
|
407
|
+
|
|
408
|
+
# Summary
|
|
409
|
+
click.echo(f"\n{'='*60}")
|
|
410
|
+
if all_passed:
|
|
411
|
+
_print_success("All validations passed!")
|
|
412
|
+
click.echo(f"{'='*60}\n")
|
|
413
|
+
else:
|
|
414
|
+
_print_error("Some validations failed. See errors above.")
|
|
415
|
+
click.echo(f"{'='*60}\n")
|
|
416
|
+
|
|
417
|
+
results["overall"] = all_passed
|
|
418
|
+
return all_passed, results
|
|
@@ -37,7 +37,7 @@ Concepts:
|
|
|
37
37
|
from __future__ import annotations
|
|
38
38
|
|
|
39
39
|
from dataclasses import asdict, dataclass, field
|
|
40
|
-
from datetime import
|
|
40
|
+
from datetime import datetime, timezone
|
|
41
41
|
from typing import Any
|
|
42
42
|
|
|
43
43
|
from .lm_call_record_abstractions import LLMCallRecord
|
|
@@ -249,7 +249,7 @@ class SessionTimeStep:
|
|
|
249
249
|
|
|
250
250
|
step_id: str = ""
|
|
251
251
|
step_index: int = 0
|
|
252
|
-
timestamp: datetime = field(default_factory=lambda: datetime.now(
|
|
252
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
253
253
|
turn_number: int | None = None
|
|
254
254
|
events: list[BaseEvent] = field(default_factory=list)
|
|
255
255
|
markov_blanket_messages: list[SessionEventMarkovBlanketMessage] = field(default_factory=list)
|
|
@@ -283,7 +283,7 @@ class SessionTrace:
|
|
|
283
283
|
"""
|
|
284
284
|
|
|
285
285
|
session_id: str = ""
|
|
286
|
-
created_at: datetime = field(default_factory=lambda: datetime.now(
|
|
286
|
+
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
287
287
|
session_time_steps: list[SessionTimeStep] = field(default_factory=list)
|
|
288
288
|
event_history: list[BaseEvent] = field(default_factory=list)
|
|
289
289
|
markov_blanket_message_history: list[SessionEventMarkovBlanketMessage] = field(
|
|
@@ -37,10 +37,14 @@ from .utils import calculate_cost, detect_provider
|
|
|
37
37
|
# Context variables for session and turn tracking
|
|
38
38
|
# These variables automatically propagate across async call boundaries,
|
|
39
39
|
# allowing deeply nested code to access tracing context without explicit passing
|
|
40
|
-
_session_id_ctx: contextvars.ContextVar[str | None] = contextvars.ContextVar(
|
|
41
|
-
|
|
40
|
+
_session_id_ctx: contextvars.ContextVar[str | None] = contextvars.ContextVar(
|
|
41
|
+
"session_id"
|
|
42
|
+
)
|
|
43
|
+
_turn_number_ctx: contextvars.ContextVar[int | None] = contextvars.ContextVar(
|
|
44
|
+
"turn_number"
|
|
45
|
+
)
|
|
42
46
|
_session_tracer_ctx: contextvars.ContextVar[Any | None] = contextvars.ContextVar(
|
|
43
|
-
"session_tracer"
|
|
47
|
+
"session_tracer"
|
|
44
48
|
)
|
|
45
49
|
|
|
46
50
|
|
|
@@ -8,7 +8,7 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import uuid
|
|
10
10
|
from dataclasses import dataclass, field
|
|
11
|
-
from datetime import
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
12
|
from typing import Any, TypedDict, cast
|
|
13
13
|
|
|
14
14
|
from .lm_call_record_abstractions import (
|
|
@@ -180,8 +180,8 @@ def create_llm_call_record_from_response(
|
|
|
180
180
|
api_type=api_type,
|
|
181
181
|
provider=provider,
|
|
182
182
|
model_name=model_name,
|
|
183
|
-
started_at=started_at or datetime.now(
|
|
184
|
-
completed_at=completed_at or datetime.now(
|
|
183
|
+
started_at=started_at or datetime.now(timezone.utc),
|
|
184
|
+
completed_at=completed_at or datetime.now(timezone.utc),
|
|
185
185
|
latency_ms=latency_ms,
|
|
186
186
|
request_params=params,
|
|
187
187
|
input_messages=input_messages,
|
|
@@ -376,8 +376,8 @@ def create_llm_call_record_from_streaming(
|
|
|
376
376
|
api_type="responses", # Streaming typically from Responses API
|
|
377
377
|
provider=provider,
|
|
378
378
|
model_name=model_name,
|
|
379
|
-
started_at=started_at or datetime.now(
|
|
380
|
-
completed_at=completed_at or datetime.now(
|
|
379
|
+
started_at=started_at or datetime.now(timezone.utc),
|
|
380
|
+
completed_at=completed_at or datetime.now(timezone.utc),
|
|
381
381
|
latency_ms=latency_ms,
|
|
382
382
|
request_params=params,
|
|
383
383
|
input_messages=input_messages,
|
|
@@ -25,15 +25,15 @@ application to continue without blocking on sync operations.
|
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
27
|
import asyncio
|
|
28
|
+
import importlib
|
|
28
29
|
import logging
|
|
29
|
-
from typing import Any
|
|
30
|
-
|
|
31
|
-
import libsql
|
|
30
|
+
from typing import Any, cast
|
|
32
31
|
|
|
33
32
|
from .config import CONFIG
|
|
34
33
|
|
|
35
34
|
logger = logging.getLogger(__name__)
|
|
36
35
|
|
|
36
|
+
libsql = cast(Any, importlib.import_module("libsql"))
|
|
37
37
|
|
|
38
38
|
class ReplicaSync:
|
|
39
39
|
"""Manages synchronization of embedded SQLite replica with remote Turso database.
|
|
@@ -53,7 +53,7 @@ class ReplicaSync:
|
|
|
53
53
|
db_path: str = "embedded.db",
|
|
54
54
|
sync_url: str | None = None,
|
|
55
55
|
auth_token: str | None = None,
|
|
56
|
-
sync_interval:
|
|
56
|
+
sync_interval: float | None = None,
|
|
57
57
|
):
|
|
58
58
|
"""Initialize replica sync manager.
|
|
59
59
|
|
|
@@ -55,11 +55,11 @@ def normalize_for_json(value: Any) -> Any:
|
|
|
55
55
|
return {str(k): normalize_for_json(v) for k, v in value.items()}
|
|
56
56
|
|
|
57
57
|
# Sequences
|
|
58
|
-
if isinstance(value,
|
|
58
|
+
if isinstance(value, list | tuple | set):
|
|
59
59
|
return [normalize_for_json(v) for v in value]
|
|
60
60
|
|
|
61
61
|
# Datetime / Date
|
|
62
|
-
if isinstance(value,
|
|
62
|
+
if isinstance(value, datetime | date):
|
|
63
63
|
return value.isoformat()
|
|
64
64
|
|
|
65
65
|
# Decimal
|
|
@@ -73,7 +73,7 @@ def normalize_for_json(value: Any) -> Any:
|
|
|
73
73
|
return str(value)
|
|
74
74
|
|
|
75
75
|
# Bytes-like
|
|
76
|
-
if isinstance(value,
|
|
76
|
+
if isinstance(value, bytes | bytearray):
|
|
77
77
|
return base64.b64encode(bytes(value)).decode("ascii")
|
|
78
78
|
|
|
79
79
|
# Enum
|
|
@@ -82,9 +82,9 @@ def normalize_for_json(value: Any) -> Any:
|
|
|
82
82
|
|
|
83
83
|
# Numpy scalars / arrays
|
|
84
84
|
if _np is not None:
|
|
85
|
-
if isinstance(value,
|
|
85
|
+
if isinstance(value, _np.generic): # type: ignore[attr-defined]
|
|
86
86
|
return normalize_for_json(value.item())
|
|
87
|
-
if isinstance(value,
|
|
87
|
+
if isinstance(value, _np.ndarray):
|
|
88
88
|
return normalize_for_json(value.tolist())
|
|
89
89
|
|
|
90
90
|
# Floats: sanitize NaN / Infinity to None
|
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import json
|
|
7
7
|
from contextlib import asynccontextmanager
|
|
8
|
-
from datetime import
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
9
|
from typing import Any
|
|
10
10
|
|
|
11
11
|
from .abstractions import (
|
|
@@ -106,7 +106,7 @@ class SessionTracer:
|
|
|
106
106
|
|
|
107
107
|
self._current_trace = SessionTrace(
|
|
108
108
|
session_id=session_id,
|
|
109
|
-
created_at=datetime.now(
|
|
109
|
+
created_at=datetime.now(timezone.utc),
|
|
110
110
|
session_time_steps=[],
|
|
111
111
|
event_history=[],
|
|
112
112
|
markov_blanket_message_history=[],
|
|
@@ -152,7 +152,7 @@ class SessionTracer:
|
|
|
152
152
|
step = SessionTimeStep(
|
|
153
153
|
step_id=step_id,
|
|
154
154
|
step_index=len(self._current_trace.session_time_steps),
|
|
155
|
-
timestamp=datetime.now(
|
|
155
|
+
timestamp=datetime.now(timezone.utc),
|
|
156
156
|
turn_number=turn_number,
|
|
157
157
|
step_metadata=metadata or {},
|
|
158
158
|
)
|
|
@@ -197,7 +197,7 @@ class SessionTracer:
|
|
|
197
197
|
step = self._current_step
|
|
198
198
|
|
|
199
199
|
if step and step.completed_at is None:
|
|
200
|
-
step.completed_at = datetime.now(
|
|
200
|
+
step.completed_at = datetime.now(timezone.utc)
|
|
201
201
|
|
|
202
202
|
# Trigger hooks
|
|
203
203
|
await self.hooks.trigger(
|
|
@@ -294,7 +294,7 @@ class SessionTracer:
|
|
|
294
294
|
content=normalised_content,
|
|
295
295
|
message_type=message_type,
|
|
296
296
|
time_record=TimeRecord(
|
|
297
|
-
event_time=event_time or datetime.now(
|
|
297
|
+
event_time=event_time or datetime.now(timezone.utc).timestamp(), message_time=message_time
|
|
298
298
|
),
|
|
299
299
|
metadata=metadata or {},
|
|
300
300
|
)
|
|
@@ -368,18 +368,28 @@ class SessionTracer:
|
|
|
368
368
|
# End any open timesteps
|
|
369
369
|
for step in self._current_trace.session_time_steps:
|
|
370
370
|
if step.completed_at is None:
|
|
371
|
-
step.completed_at = datetime.now(
|
|
371
|
+
step.completed_at = datetime.now(timezone.utc)
|
|
372
372
|
|
|
373
373
|
# Trigger pre-save hooks
|
|
374
374
|
await self.hooks.trigger("before_save", session=self._current_trace)
|
|
375
375
|
|
|
376
376
|
# Save if requested
|
|
377
377
|
should_save = save if save is not None else self.auto_save
|
|
378
|
+
|
|
379
|
+
# Debug logging
|
|
380
|
+
import logging
|
|
381
|
+
_logger = logging.getLogger(__name__)
|
|
382
|
+
_logger.info(f"[TRACE_DEBUG] end_session: should_save={should_save}, self.db={self.db is not None}, auto_save={self.auto_save}")
|
|
383
|
+
|
|
378
384
|
if should_save and self.db:
|
|
385
|
+
_logger.info(f"[TRACE_DEBUG] Calling insert_session_trace with {len(self._current_trace.markov_blanket_message_history)} messages")
|
|
379
386
|
await self.db.insert_session_trace(self._current_trace)
|
|
387
|
+
_logger.info(f"[TRACE_DEBUG] insert_session_trace completed")
|
|
380
388
|
|
|
381
389
|
# Trigger post-save hooks
|
|
382
390
|
await self.hooks.trigger("after_save", session=self._current_trace)
|
|
391
|
+
else:
|
|
392
|
+
_logger.warning(f"[TRACE_DEBUG] Skipping save: should_save={should_save}, self.db={self.db is not None}")
|
|
383
393
|
|
|
384
394
|
# Trigger session end hooks
|
|
385
395
|
await self.hooks.trigger("session_end", session=self._current_trace)
|