PyPI - synth-ai - Versions diffs - 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl - Mend

synth-ai 0.2.13.dev2py3-none-any.whl → 0.2.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show

examples/README.md +1 -0
examples/multi_step/SFT_README.md +147 -0
examples/multi_step/configs/README_verilog_rl.md +77 -0
examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
examples/multi_step/configs/crafter_synth_backend.md +40 -0
examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
examples/multi_step/configs/verilog_rl_lora.toml +190 -0
examples/multi_step/convert_traces_to_sft.py +84 -0
examples/multi_step/judges/crafter_backend_judge.py +220 -0
examples/multi_step/judges/verilog_backend_judge.py +234 -0
examples/multi_step/readme.md +48 -0
examples/multi_step/run_sft_qwen30b.sh +45 -0
examples/multi_step/verilog_rl_lora.md +218 -0
examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
examples/qwen_coder/configs/coder_lora_small.toml +2 -1
examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
examples/qwen_vl/QUICKSTART.md +327 -0
examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
examples/qwen_vl/README.md +154 -0
examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
examples/qwen_vl/RL_VISION_TESTING.md +333 -0
examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
examples/qwen_vl/SETUP_COMPLETE.md +275 -0
examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
examples/qwen_vl/__init__.py +2 -0
examples/qwen_vl/collect_data_via_cli.md +423 -0
examples/qwen_vl/collect_vision_traces.py +368 -0
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
examples/qwen_vl/configs/filter_vision_test.toml +8 -0
examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
examples/qwen_vl/run_vision_comparison.sh +62 -0
examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
examples/qwen_vl/test_image_validation.py +201 -0
examples/qwen_vl/test_sft_vision_data.py +110 -0
examples/rl/README.md +1 -1
examples/rl/configs/eval_base_qwen.toml +17 -0
examples/rl/configs/eval_rl_qwen.toml +13 -0
examples/rl/configs/rl_from_base_qwen.toml +37 -0
examples/rl/configs/rl_from_base_qwen17.toml +76 -0
examples/rl/configs/rl_from_ft_qwen.toml +37 -0
examples/rl/run_eval.py +436 -0
examples/rl/run_rl_and_save.py +111 -0
examples/rl/task_app/README.md +22 -0
examples/rl/task_app/math_single_step.py +990 -0
examples/rl/task_app/math_task_app.py +111 -0
examples/sft/README.md +5 -5
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
examples/sft/evaluate.py +4 -4
examples/sft/export_dataset.py +7 -4
examples/sft/generate_traces.py +2 -0
examples/swe/task_app/README.md +1 -1
examples/swe/task_app/grpo_swe_mini.py +1 -1
examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
examples/swe/task_app/hosted/policy_routes.py +0 -2
examples/swe/task_app/hosted/rollout.py +2 -8
examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
examples/task_apps/crafter/task_app/__init__.py +3 -0
examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
examples/task_apps/enron/__init__.py +1 -0
examples/task_apps/enron/filter_sft.toml +5 -0
examples/task_apps/enron/tests/__init__.py +2 -0
examples/task_apps/enron/tests/integration/__init__.py +2 -0
examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
examples/task_apps/enron/tests/unit/__init__.py +2 -0
examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
examples/task_apps/pokemon_red/task_app.py +199 -6
examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
examples/task_apps/sokoban/filter_sft.toml +5 -0
examples/task_apps/sokoban/tests/__init__.py +2 -0
examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
examples/task_apps/verilog/filter_sft.toml +5 -0
examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
examples/task_apps/verilog/tests/__init__.py +2 -0
examples/task_apps/verilog/tests/integration/__init__.py +2 -0
examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
examples/task_apps/verilog/tests/unit/__init__.py +2 -0
examples/vlm/README.md +3 -3
examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
examples/vlm/crafter_openai_vlm_agent.py +3 -5
examples/vlm/filter_image_rows.py +1 -1
examples/vlm/run_crafter_vlm_benchmark.py +2 -2
examples/warming_up_to_rl/_utils.py +92 -0
examples/warming_up_to_rl/analyze_trace_db.py +1 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
examples/warming_up_to_rl/export_trace_sft.py +174 -60
examples/warming_up_to_rl/groq_test.py +2 -0
examples/warming_up_to_rl/readme.md +63 -132
examples/warming_up_to_rl/run_fft_and_save.py +1 -1
examples/warming_up_to_rl/run_local_rollout.py +2 -0
examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
examples/warming_up_to_rl/run_rl_and_save.py +1 -1
examples/warming_up_to_rl/run_rollout_remote.py +2 -0
examples/warming_up_to_rl/task_app/README.md +42 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
synth_ai/__init__.py +44 -30
synth_ai/_utils/__init__.py +47 -0
synth_ai/_utils/base_url.py +10 -0
synth_ai/_utils/http.py +10 -0
synth_ai/_utils/prompts.py +10 -0
synth_ai/_utils/task_app_state.py +12 -0
synth_ai/_utils/user_config.py +10 -0
synth_ai/api/models/supported.py +145 -7
synth_ai/api/train/__init__.py +13 -1
synth_ai/api/train/cli.py +30 -7
synth_ai/api/train/config_finder.py +18 -11
synth_ai/api/train/env_resolver.py +13 -10
synth_ai/cli/__init__.py +66 -49
synth_ai/cli/_modal_wrapper.py +9 -6
synth_ai/cli/_typer_patch.py +0 -2
synth_ai/cli/_validate_task_app.py +22 -4
synth_ai/cli/legacy_root_backup.py +3 -1
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/task_app_discovery.py +7 -0
synth_ai/cli/lib/task_app_env.py +518 -0
synth_ai/cli/recent.py +1 -0
synth_ai/cli/setup.py +266 -0
synth_ai/cli/task_app_deploy.py +16 -0
synth_ai/cli/task_app_list.py +25 -0
synth_ai/cli/task_app_modal_serve.py +16 -0
synth_ai/cli/task_app_serve.py +18 -0
synth_ai/cli/task_apps.py +392 -141
synth_ai/cli/train.py +18 -0
synth_ai/cli/tui.py +62 -0
synth_ai/demos/__init__.py +10 -0
synth_ai/demos/core/__init__.py +28 -1
synth_ai/demos/crafter/__init__.py +1 -0
synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/demos/demo_registry.py +176 -0
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/math/__init__.py +1 -0
synth_ai/demos/math/_common.py +16 -0
synth_ai/demos/math/app.py +38 -0
synth_ai/demos/math/config.toml +76 -0
synth_ai/demos/math/deploy_modal.py +54 -0
synth_ai/demos/math/modal_task_app.py +702 -0
synth_ai/demos/math/task_app_entry.py +51 -0
synth_ai/environments/environment/core.py +7 -1
synth_ai/environments/examples/bandit/engine.py +0 -1
synth_ai/environments/examples/bandit/environment.py +0 -1
synth_ai/environments/examples/crafter_classic/environment.py +1 -1
synth_ai/environments/examples/verilog/engine.py +76 -10
synth_ai/environments/examples/wordle/environment.py +0 -1
synth_ai/evals/base.py +16 -5
synth_ai/evals/client.py +1 -1
synth_ai/inference/client.py +1 -1
synth_ai/learning/client.py +1 -1
synth_ai/learning/health.py +1 -1
synth_ai/learning/jobs.py +1 -1
synth_ai/learning/rl/client.py +1 -1
synth_ai/learning/rl/env_keys.py +1 -1
synth_ai/learning/rl/secrets.py +1 -1
synth_ai/learning/sft/client.py +1 -1
synth_ai/learning/sft/data.py +407 -4
synth_ai/learning/validators.py +4 -1
synth_ai/task/__init__.py +11 -1
synth_ai/task/apps/__init__.py +5 -2
synth_ai/task/config.py +259 -0
synth_ai/task/contracts.py +15 -2
synth_ai/task/rubrics/__init__.py +4 -2
synth_ai/task/rubrics/loaders.py +27 -4
synth_ai/task/rubrics/scoring.py +3 -0
synth_ai/task/rubrics.py +219 -0
synth_ai/task/trace_correlation_helpers.py +328 -0
synth_ai/task/tracing_utils.py +14 -3
synth_ai/task/validators.py +145 -2
synth_ai/tracing_v3/config.py +15 -13
synth_ai/tracing_v3/constants.py +21 -0
synth_ai/tracing_v3/db_config.py +3 -1
synth_ai/tracing_v3/decorators.py +10 -7
synth_ai/tracing_v3/session_tracer.py +10 -0
synth_ai/tracing_v3/turso/daemon.py +2 -2
synth_ai/tracing_v3/turso/native_manager.py +108 -77
synth_ai/tracing_v3/utils.py +1 -1
synth_ai/tui/__init__.py +5 -0
synth_ai/tui/__main__.py +13 -0
synth_ai/tui/cli/__init__.py +1 -0
synth_ai/tui/cli/query_experiments.py +164 -0
synth_ai/tui/cli/query_experiments_v3.py +164 -0
synth_ai/tui/dashboard.py +911 -0
synth_ai/utils/__init__.py +101 -0
synth_ai/utils/base_url.py +94 -0
synth_ai/utils/cli.py +131 -0
synth_ai/utils/env.py +287 -0
synth_ai/utils/http.py +169 -0
synth_ai/utils/modal.py +308 -0
synth_ai/utils/process.py +212 -0
synth_ai/utils/prompts.py +39 -0
synth_ai/utils/sqld.py +122 -0
synth_ai/utils/task_app_discovery.py +882 -0
synth_ai/utils/task_app_env.py +186 -0
synth_ai/utils/task_app_state.py +318 -0
synth_ai/utils/user_config.py +137 -0
synth_ai/v0/config/__init__.py +1 -5
synth_ai/v0/config/base_url.py +1 -7
synth_ai/v0/tracing/config.py +1 -1
synth_ai/v0/tracing/decorators.py +1 -1
synth_ai/v0/tracing/upload.py +1 -1
synth_ai/v0/tracing_v1/config.py +1 -1
synth_ai/v0/tracing_v1/decorators.py +1 -1
synth_ai/v0/tracing_v1/upload.py +1 -1
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
synth_ai/cli/man.py +0 -106
synth_ai/compound/cais.py +0 -0
synth_ai/core/experiment.py +0 -13
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -295
synth_ai/handshake.py +0 -109
synth_ai/http.py +0 -26
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0

synth_ai/task/trace_correlation_helpers.py ADDED Viewed

@@ -0,0 +1,328 @@
+"""Helpers for trace correlation ID extraction and inclusion in task apps.
+This module provides utilities for task apps to:
+1. Extract trace_correlation_id from rollout requests
+2. Include trace_correlation_id in rollout responses (3 required locations)
+See monorepo/trace_creation_and_judgement.txt "Fatal Guards" section for requirements.
+"""
+import importlib
+import logging
+from typing import Any, cast
+from urllib.parse import parse_qs, urlparse
+logger = logging.getLogger(__name__)
+def extract_trace_correlation_id(
+    policy_config: dict[str, Any],
+    inference_url: str | None = None,
+    mode: Any = None
+) -> str | None:
+    """
+    Extract trace_correlation_id from policy config or inference URL.
+    This is the standardized method for all task apps to extract the correlation ID
+    that the RL trainer generates and passes to the task app.
+    Args:
+        policy_config: Policy configuration dict from RolloutRequest.policy.config
+        inference_url: Inference URL (optional, used as fallback)
+        mode: RolloutMode or string ("rl" or "eval"). Controls warning behavior -
+              warnings only logged for RL mode, not EVAL mode.
+    Returns:
+        trace_correlation_id if found, None otherwise
+    Extraction order:
+        1. policy_config["trace_correlation_id"] (preferred)
+        2. policy_config["trace"] (legacy fallback)
+        3. URL query param ?cid=... (fallback)
+        4. URL query param ?trace_correlation_id=... (fallback)
+    """
+    # Try policy_config first (preferred method)
+    candidates: list[Any] = [
+        policy_config.get("trace_correlation_id"),
+        policy_config.get("trace"),
+    ]
+    logger.debug(
+        "extract_trace_correlation_id: policy_cfg keys=%s candidates=%s",
+        sorted(policy_config.keys()),
+        candidates,
+    )
+    for candidate in candidates:
+        if isinstance(candidate, str):
+            stripped = candidate.strip()
+            if stripped:
+                logger.info(
+                    "extract_trace_correlation_id: extracted from policy_config=%s",
+                    stripped
+                )
+                return stripped
+    # Determine if we're in EVAL mode (trace_correlation_id not required for eval)
+    rollout_mode_cls: Any | None = None
+    try:
+        contracts_module = importlib.import_module("synth_ai.task.contracts")
+        rollout_mode_cls = getattr(contracts_module, "RolloutMode", None)
+    except Exception:
+        rollout_mode_cls = None
+    is_eval_mode = False
+    if rollout_mode_cls is not None:
+        try:
+            is_eval_mode = (
+                mode == "eval"
+                or mode == rollout_mode_cls.EVAL
+                or getattr(mode, "value", None) == "eval"
+            )
+        except Exception:
+            is_eval_mode = mode == "eval"
+    else:
+        is_eval_mode = mode == "eval" or getattr(mode, "value", None) == "eval"
+    # Fallback: try to extract from inference_url query params
+    if not inference_url or not isinstance(inference_url, str):
+        if is_eval_mode:
+            logger.debug(
+                "extract_trace_correlation_id: no correlation ID found in policy_config "
+                "and no inference_url provided (EVAL mode - expected)"
+            )
+        else:
+            logger.warning(
+                "extract_trace_correlation_id: no correlation ID found in policy_config "
+                "and no inference_url provided"
+            )
+        return None
+    try:
+        parsed = urlparse(inference_url)
+        query_params = cast(dict[str, list[str]], parse_qs(parsed.query or ""))
+        # Try multiple possible query param names
+        for param_name in ["cid", "trace_correlation_id", "trace"]:
+            values = query_params.get(param_name)
+            if not values:
+                continue
+            for value in values:
+                if isinstance(value, str) and value.strip():
+                    correlation_id = value.strip()
+                    logger.info(
+                        "extract_trace_correlation_id: extracted from URL param %s=%s",
+                        param_name,
+                        correlation_id,
+                    )
+                    return correlation_id
+    except Exception as e:
+        logger.warning(
+            "extract_trace_correlation_id: failed to parse inference_url=%s error=%s",
+            inference_url,
+            e,
+        )
+    if is_eval_mode:
+        logger.debug(
+            "extract_trace_correlation_id: no trace_correlation_id found in "
+            "policy_config or inference_url=%s (EVAL mode - expected)",
+            inference_url,
+        )
+    else:
+        logger.warning(
+            "extract_trace_correlation_id: no trace_correlation_id found in "
+            "policy_config or inference_url=%s",
+            inference_url,
+        )
+    return None
+def validate_trace_correlation_id(
+    trace_correlation_id: str | None,
+    run_id: str,
+    policy_config: dict[str, Any],
+    fatal: bool = False
+) -> str | None:
+    """
+    Validate that trace_correlation_id was successfully extracted.
+    Args:
+        trace_correlation_id: The extracted correlation ID (or None)
+        run_id: Rollout run_id for logging
+        policy_config: Policy configuration for debugging
+        fatal: If True, raise ValueError on missing ID. If False, log error only.
+    Returns:
+        trace_correlation_id if present, None if missing (when fatal=False)
+    Raises:
+        ValueError: If trace_correlation_id is missing and fatal=True
+    """
+    if not trace_correlation_id:
+        error_msg = (
+            f"🚨 CRITICAL: Cannot extract trace_correlation_id!\n"
+            "\n"
+            f"Run ID: {run_id}\n"
+            f"Policy config keys: {sorted(policy_config.keys())}\n"
+            f"Inference URL: {policy_config.get('inference_url', 'NOT_SET')}\n"
+            "\n"
+            "Checked:\n"
+            f"1. policy_config['trace_correlation_id']: {policy_config.get('trace_correlation_id')}\n"
+            f"2. policy_config['trace']: {policy_config.get('trace')}\n"
+            f"3. inference_url query params\n"
+            "\n"
+            "Task app CANNOT proceed without trace_correlation_id.\n"
+            "This indicates the RL trainer is not sending it correctly.\n"
+            "\n"
+            "See monorepo/trace_creation_and_judgement.txt 'Fatal Guards' section.\n"
+        )
+        if fatal:
+            raise ValueError(error_msg)
+        else:
+            logger.error(error_msg)
+    return trace_correlation_id
+def include_trace_correlation_id_in_response(
+    response_data: dict[str, Any],
+    trace_correlation_id: str | None,
+    run_id: str
+) -> dict[str, Any]:
+    """
+    Include trace_correlation_id in all required locations of rollout response.
+    Required locations (per Fatal Guards section):
+    1. Top-level response["trace_correlation_id"]
+    2. response["pipeline_metadata"]["trace_correlation_id"]
+    3. Each trajectory["trace_correlation_id"]
+    Args:
+        response_data: RolloutResponse dict (from .model_dump())
+        trace_correlation_id: The correlation ID to include
+        run_id: Rollout run_id for logging
+    Returns:
+        Modified response_data with trace_correlation_id in all required places
+    """
+    if not trace_correlation_id:
+        logger.error(
+            "include_trace_correlation_id_in_response: missing trace_correlation_id "
+            "for run_id=%s - cannot include in response",
+            run_id
+        )
+        return response_data
+    # 1. Add to top-level (REQUIRED)
+    if "trace_correlation_id" not in response_data:
+        response_data["trace_correlation_id"] = trace_correlation_id
+        logger.info(
+            "include_trace_correlation_id: added to top-level run_id=%s cid=%s",
+            run_id,
+            trace_correlation_id
+        )
+    # 2. Add to pipeline_metadata (REQUIRED)
+    pipeline_meta = response_data.get("pipeline_metadata")
+    if not isinstance(pipeline_meta, dict):
+        pipeline_meta = {}
+        response_data["pipeline_metadata"] = pipeline_meta
+    if "trace_correlation_id" not in pipeline_meta:
+        pipeline_meta["trace_correlation_id"] = trace_correlation_id
+        logger.info(
+            "include_trace_correlation_id: added to pipeline_metadata run_id=%s cid=%s",
+            run_id,
+            trace_correlation_id
+        )
+    # 3. Add to each trajectory (REQUIRED)
+    trajectories = response_data.get("trajectories", [])
+    if isinstance(trajectories, list):
+        for idx, traj in enumerate(trajectories):
+            if isinstance(traj, dict) and "trace_correlation_id" not in traj:
+                traj["trace_correlation_id"] = trace_correlation_id
+                logger.debug(
+                    "include_trace_correlation_id: added to trajectory[%d] run_id=%s cid=%s",
+                    idx,
+                    run_id,
+                    trace_correlation_id
+                )
+    logger.info(
+        "include_trace_correlation_id: completed run_id=%s cid=%s "
+        "added to %d locations (top-level, metadata, %d trajectories)",
+        run_id,
+        trace_correlation_id,
+        2 + len(trajectories),
+        len(trajectories)
+    )
+    return response_data
+def verify_trace_correlation_id_in_response(
+    response_data: dict[str, Any],
+    expected_correlation_id: str | None,
+    run_id: str
+) -> bool:
+    """
+    Verify that trace_correlation_id is present in all required locations.
+    Args:
+        response_data: RolloutResponse dict to verify
+        expected_correlation_id: The correlation ID that should be present
+        run_id: Rollout run_id for logging
+    Returns:
+        True if all required locations have the correlation ID, False otherwise
+    """
+    if not expected_correlation_id:
+        logger.error(
+            "verify_trace_correlation_id: no expected_correlation_id provided for run_id=%s",
+            run_id
+        )
+        return False
+    errors = []
+    # Check top-level
+    if response_data.get("trace_correlation_id") != expected_correlation_id:
+        errors.append(
+            f"Top-level missing or mismatch: "
+            f"expected={expected_correlation_id} actual={response_data.get('trace_correlation_id')}"
+        )
+    # Check pipeline_metadata
+    pipeline_meta = response_data.get("pipeline_metadata", {})
+    if not isinstance(pipeline_meta, dict) or pipeline_meta.get("trace_correlation_id") != expected_correlation_id:
+        errors.append(
+            f"pipeline_metadata missing or mismatch: "
+            f"expected={expected_correlation_id} actual={pipeline_meta.get('trace_correlation_id') if isinstance(pipeline_meta, dict) else 'NOT_A_DICT'}"
+        )
+    # Check trajectories
+    trajectories = response_data.get("trajectories", [])
+    if isinstance(trajectories, list):
+        for idx, traj in enumerate(trajectories):
+            if isinstance(traj, dict) and traj.get("trace_correlation_id") != expected_correlation_id:
+                errors.append(
+                    f"trajectory[{idx}] missing or mismatch: "
+                    f"expected={expected_correlation_id} actual={traj.get('trace_correlation_id')}"
+                )
+    if errors:
+        logger.error(
+            "verify_trace_correlation_id: FAILED run_id=%s\n%s",
+            run_id,
+            "\n".join(errors)
+        )
+        return False
+    logger.info(
+        "verify_trace_correlation_id: PASSED run_id=%s cid=%s",
+        run_id,
+        expected_correlation_id
+    )
+    return True

synth_ai/task/tracing_utils.py CHANGED Viewed

@@ -4,9 +4,12 @@ from __future__ import annotations
 import os
 from collections.abc import Callable
+from datetime import datetime
 from pathlib import Path
 from typing import Any
+from synth_ai.tracing_v3.constants import TRACE_DB_DIR, canonical_trace_db_name
 def tracing_env_enabled(default: bool = False) -> bool:
     """Return True when tracing is enabled for task apps via environment variable."""
@@ -40,9 +43,17 @@ def resolve_tracing_db_url() -> str | None:
             path.parent.mkdir(parents=True, exist_ok=True)
             return f"sqlite+aiosqlite:///{path}"
-    fallback_path = Path("traces/v3/synth_ai.db").expanduser()
-    fallback_path.parent.mkdir(parents=True, exist_ok=True)
-    return f"sqlite+aiosqlite:///{fallback_path}"
+    existing = os.getenv("TASKAPP_TRACE_DB_PATH")
+    if existing:
+        path = Path(existing).expanduser()
+    else:
+        base_dir = TRACE_DB_DIR.expanduser()
+        base_dir.mkdir(parents=True, exist_ok=True)
+        path = base_dir / canonical_trace_db_name(timestamp=datetime.now())
+        os.environ["TASKAPP_TRACE_DB_PATH"] = str(path)
+        os.environ.setdefault("SQLD_DB_PATH", str(path))
+    path.parent.mkdir(parents=True, exist_ok=True)
+    return f"sqlite+aiosqlite:///{path}"
 def build_tracer_factory(

synth_ai/task/validators.py CHANGED Viewed

@@ -3,14 +3,157 @@
 from __future__ import annotations
 import re
-from typing import Any
+from typing import Any, cast
+from urllib.parse import urlparse, urlunparse
 import click
 import httpx
 from synth_ai.task.contracts import TaskAppEndpoints  # type: ignore[attr-defined]
+def validate_rollout_response_for_rl(response_data: dict[str, Any], *, warn_only: bool = False) -> list[str]:
+    """Validate that a task app rollout response has required fields for RL training.
+    The backend RL trainer requires:
+    1. pipeline_metadata["inference_url"] at top level (with ?cid= for trace correlation)
+    2. Each step's info.meta["inference_url"] must be present (nested structure!)
+    Args:
+        response_data: The rollout response dict from task app
+        warn_only: If True, return warnings instead of raising exceptions
+    Returns:
+        List of validation warnings/errors
+    Raises:
+        ValueError: If critical fields are missing (unless warn_only=True)
+    """
+    issues = []
+    # Check pipeline_metadata
+    pipeline_metadata = response_data.get("pipeline_metadata")
+    if not isinstance(pipeline_metadata, dict):
+        issues.append("Missing or invalid 'pipeline_metadata' (required for RL training)")
+    else:
+        inference_url = pipeline_metadata.get("inference_url")
+        if not inference_url:
+            issues.append(
+                "pipeline_metadata['inference_url'] is missing. "
+                "RL trainer requires this field to extract traces."
+            )
+        elif not isinstance(inference_url, str):
+            issues.append(
+                f"pipeline_metadata['inference_url'] must be a string, got: {type(inference_url).__name__}"
+            )
+        elif "?cid=" not in inference_url:
+            issues.append(
+                f"pipeline_metadata['inference_url'] should contain '?cid=' for trace correlation. "
+                f"Got: {inference_url[:80]}..."
+            )
+    # Check trajectories and steps
+    trajectories = response_data.get("trajectories", [])
+    if not trajectories:
+        issues.append("No trajectories found in response")
+    for traj_idx, trajectory in enumerate(trajectories):
+        if not isinstance(trajectory, dict):
+            continue
+        steps = trajectory.get("steps", [])
+        for step_idx, step in enumerate(steps):
+            if not isinstance(step, dict):
+                continue
+            step_info = step.get("info", {})
+            if not isinstance(step_info, dict):
+                issues.append(
+                    f"trajectory[{traj_idx}].steps[{step_idx}].info is not a dict"
+                )
+                continue
+            # Check for nested meta.inference_url (backend expects this structure!)
+            step_meta = step_info.get("meta", {})
+            if not isinstance(step_meta, dict):
+                issues.append(
+                    f"trajectory[{traj_idx}].steps[{step_idx}].info.meta is missing or not a dict. "
+                    f"RL trainer expects nested structure: info.meta.inference_url"
+                )
+                continue
+            step_inference_url = step_meta.get("inference_url")
+            if not step_inference_url:
+                issues.append(
+                    f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] is missing. "
+                    f"RL trainer needs this for trace extraction (nested structure required!)"
+                )
+            elif not isinstance(step_inference_url, str):
+                issues.append(
+                    f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] must be a string, "
+                    f"got: {type(step_inference_url).__name__}"
+                )
+    if issues and not warn_only:
+        error_msg = "Task app response validation failed for RL training:\n" + "\n".join(
+            f"  - {issue}" for issue in issues
+        )
+        raise ValueError(error_msg)
+    return issues
+def normalize_inference_url(url: str | None, *, default: str = "https://api.openai.com/v1/chat/completions") -> str:
+    """Normalize an inference URL to include the /v1/chat/completions path.
+    This utility ensures inference URLs have the correct path structure for OpenAI-compatible
+    chat completions endpoints, while preserving query parameters (e.g., ?cid=trace_123)
+    that may be added for tracing.
+    Args:
+        url: The inference URL to normalize (may be None or incomplete)
+        default: Default URL to use if url is None/empty
+    Returns:
+        Normalized URL with proper path and preserved query parameters
+    Examples:
+        >>> normalize_inference_url("https://api.groq.com")
+        'https://api.groq.com/v1/chat/completions'
+        >>> normalize_inference_url("https://modal.host?cid=trace_123")
+        'https://modal.host/v1/chat/completions?cid=trace_123'
+        >>> normalize_inference_url("https://api.openai.com/v1")
+        'https://api.openai.com/v1/chat/completions'
+        >>> normalize_inference_url("https://api.groq.com/openai/v1/chat/completions")
+        'https://api.groq.com/openai/v1/chat/completions'
+    """
+    candidate = (url or default).strip()
+    if not candidate:
+        candidate = default
+    # Parse the URL to separate path and query components
+    parsed = urlparse(candidate)
+    # Check if path already ends with a completions endpoint
+    path = parsed.path.rstrip('/')
+    if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
+        return candidate
+    # Determine what to append based on existing path
+    if path.endswith("/v1"):
+        new_path = f"{path}/chat/completions"
+    elif path.endswith("/chat"):
+        new_path = f"{path}/completions"
+    else:
+        # Default: append full path
+        new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
+    # Reconstruct URL with new path and original query/fragment
+    return cast(str, urlunparse(parsed._replace(path=new_path)))
 def validate_task_app_url(url: str | None) -> str:
     """Validate and normalize a task app URL.

synth_ai/tracing_v3/config.py CHANGED Viewed

@@ -3,27 +3,29 @@
 import os
 from dataclasses import dataclass
+from synth_ai.tracing_v3.constants import canonical_trace_db_path
+DEFAULT_DB_FILE = str(canonical_trace_db_path())
+def _default_sqlite_url() -> str:
+    base_path = os.path.abspath(os.getenv("SQLD_DB_PATH", DEFAULT_DB_FILE))
+    candidate = os.path.join(base_path, "dbs", "default", "data")
+    if os.path.isdir(base_path) and os.path.exists(candidate):
+        return f"sqlite+aiosqlite:///{candidate}"
+    return f"sqlite+aiosqlite:///{base_path}"
 @dataclass
 class TursoConfig:
     """Configuration for Turso/sqld connection."""
     # Default values matching serve.sh
-    DEFAULT_DB_FILE = "traces/v3/synth_ai.db"
+    DEFAULT_DB_FILE = DEFAULT_DB_FILE
     DEFAULT_HTTP_PORT = 8080
-    # Local embedded database for async SQLAlchemy
-    # Resolve to the actual SQLite file used by sqld if the base path is a directory
-    def _resolve_sqlite_db_url() -> str:  # type: ignore[no-redef]
-        base_path = os.path.abspath(os.getenv("SQLD_DB_PATH", "traces/v3/synth_ai.db"))
-        # If sqld is managing this DB, the real SQLite file lives under dbs/default/data
-        candidate = os.path.join(base_path, "dbs", "default", "data")
-        if os.path.isdir(base_path) and os.path.exists(candidate):
-            return f"sqlite+aiosqlite:///{candidate}"
-        return f"sqlite+aiosqlite:///{base_path}"
     # Use env override if provided; otherwise resolve based on SQLD layout
-    db_url: str = os.getenv("TURSO_LOCAL_DB_URL", _resolve_sqlite_db_url())
+    db_url: str = os.getenv("TURSO_LOCAL_DB_URL", _default_sqlite_url())
     # Remote database sync configuration
     sync_url: str = os.getenv("TURSO_DATABASE_URL", "")
@@ -48,7 +50,7 @@ class TursoConfig:
     # Daemon settings (for local sqld) - match serve.sh defaults
     sqld_binary: str = os.getenv("SQLD_BINARY", "sqld")
-    sqld_db_path: str = os.getenv("SQLD_DB_PATH", "traces/v3/synth_ai.db")
+    sqld_db_path: str = os.getenv("SQLD_DB_PATH", DEFAULT_DB_FILE)
     sqld_http_port: int = int(os.getenv("SQLD_HTTP_PORT", "8080"))
     sqld_idle_shutdown: int = int(os.getenv("SQLD_IDLE_SHUTDOWN", "0"))  # 0 = no idle shutdown

synth_ai/tracing_v3/constants.py ADDED Viewed

@@ -0,0 +1,21 @@
+from __future__ import annotations
+from datetime import datetime
+from pathlib import Path
+TRACE_DB_DIR = Path("traces")
+TRACE_DB_BASENAME = "task_app_traces"
+def canonical_trace_db_name(*, timestamp: datetime | None = None) -> str:
+    """Return the canonical trace database filename (with optional timestamp suffix)."""
+    if timestamp is None:
+        return f"{TRACE_DB_BASENAME}.db"
+    return f"{TRACE_DB_BASENAME}_{timestamp.strftime('%Y-%m-%d_%H-%M-%S')}.db"
+def canonical_trace_db_path(*, timestamp: datetime | None = None) -> Path:
+    """Return the canonical trace database path within the default trace directory."""
+    return TRACE_DB_DIR / canonical_trace_db_name(timestamp=timestamp)

synth_ai/tracing_v3/db_config.py CHANGED Viewed

@@ -7,6 +7,8 @@ import os
 import shutil
 from typing import TYPE_CHECKING, Optional
+from synth_ai.tracing_v3.constants import canonical_trace_db_path
 if TYPE_CHECKING:
     from .turso.daemon import SqldDaemon
@@ -17,7 +19,7 @@ class DatabaseConfig:
     """Centralized database configuration management."""
     # Default values from serve.sh
-    DEFAULT_DB_FILE = "traces/v3/synth_ai.db"
+    DEFAULT_DB_FILE = str(canonical_trace_db_path())
     DEFAULT_HTTP_PORT = 8080
     def __init__(

synth_ai/tracing_v3/decorators.py CHANGED Viewed

@@ -29,6 +29,7 @@ import contextvars
 import functools
 import time
 from collections.abc import Awaitable, Callable, Mapping
+from contextvars import Token
 from typing import Any, TypeVar, cast, overload
 from .abstractions import LMCAISEvent, TimeRecord
@@ -367,11 +368,11 @@ class SessionContext:
         ```
     """
-    def __init__(self, session_id: str, tracer=None):
+    def __init__(self, session_id: str, tracer: Any | None = None):
         self.session_id = session_id
         self.tracer = tracer
-        self._token = None
-        self._tracer_token = None
+        self._token: Token[str | None] | None = None
+        self._tracer_token: Token[Any] | None = None
     def __enter__(self):
         # Store tokens to restore previous context on exit
@@ -382,8 +383,9 @@ class SessionContext:
     def __exit__(self, exc_type, exc_val, exc_tb):
         # Restore previous context - this is crucial for proper isolation
-        _session_id_ctx.reset(self._token)
-        if self._tracer_token:
+        if self._token is not None:
+            _session_id_ctx.reset(self._token)
+        if self._tracer_token is not None:
             _session_tracer_ctx.reset(self._tracer_token)
     async def __aenter__(self):
@@ -393,6 +395,7 @@ class SessionContext:
         return self
     async def __aexit__(self, exc_type, exc_val, exc_tb):
-        _session_id_ctx.reset(self._token)
-        if self._tracer_token:
+        if self._token is not None:
+            _session_id_ctx.reset(self._token)
+        if self._tracer_token is not None:
             _session_tracer_ctx.reset(self._tracer_token)

synth_ai/tracing_v3/session_tracer.py CHANGED Viewed

@@ -375,11 +375,21 @@ class SessionTracer:
             # Save if requested
             should_save = save if save is not None else self.auto_save
+            # Debug logging
+            import logging
+            _logger = logging.getLogger(__name__)
+            _logger.info(f"[TRACE_DEBUG] end_session: should_save={should_save}, self.db={self.db is not None}, auto_save={self.auto_save}")
             if should_save and self.db:
+                _logger.info(f"[TRACE_DEBUG] Calling insert_session_trace with {len(self._current_trace.markov_blanket_message_history)} messages")
                 await self.db.insert_session_trace(self._current_trace)
+                _logger.info("[TRACE_DEBUG] insert_session_trace completed")
                 # Trigger post-save hooks
                 await self.hooks.trigger("after_save", session=self._current_trace)
+            else:
+                _logger.warning(f"[TRACE_DEBUG] Skipping save: should_save={should_save}, self.db={self.db is not None}")
             # Trigger session end hooks
             await self.hooks.trigger("session_end", session=self._current_trace)

synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.13.dev2py3-none-any.whl → 0.2.16py3-none-any.whl