traceguard 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- traceguard/__init__.py +11 -0
- traceguard/registry/__init__.py +11 -0
- traceguard/registry/models.py +145 -0
- traceguard/registry/prompts.py +148 -0
- traceguard/sdk/__init__.py +4 -0
- traceguard/sdk/normalizer.py +101 -0
- traceguard/sdk/tracer.py +269 -0
- traceguard/sdk/wrappers/__init__.py +1 -0
- traceguard/sdk/wrappers/anthropic.py +128 -0
- traceguard/store/__init__.py +4 -0
- traceguard/store/models.py +143 -0
- traceguard/validators/__init__.py +14 -0
- traceguard/validators/lookahead.py +130 -0
- traceguard-0.2.0.dist-info/METADATA +112 -0
- traceguard-0.2.0.dist-info/RECORD +17 -0
- traceguard-0.2.0.dist-info/WHEEL +4 -0
- traceguard-0.2.0.dist-info/licenses/LICENSE +202 -0
traceguard/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Model + prompt registries (SPEC §4.2, §4.3)."""
|
|
2
|
+
from traceguard.registry.models import NoEligibleModelError, register_model, select_model
|
|
3
|
+
from traceguard.registry.prompts import PromptTemplate, load_prompt
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"NoEligibleModelError",
|
|
7
|
+
"PromptTemplate",
|
|
8
|
+
"load_prompt",
|
|
9
|
+
"register_model",
|
|
10
|
+
"select_model",
|
|
11
|
+
]
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Model registry queries (SPEC §4.2)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Literal, overload
|
|
6
|
+
|
|
7
|
+
from sqlalchemy import or_, select
|
|
8
|
+
from sqlalchemy.engine import Engine
|
|
9
|
+
from sqlalchemy.orm import Session
|
|
10
|
+
|
|
11
|
+
from traceguard.store.models import ModelRegistryEntry, make_engine
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NoEligibleModelError(LookupError):
|
|
15
|
+
"""No model satisfies the look-ahead constraints (SPEC §4.2 strict mode)."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def register_model(
|
|
19
|
+
model_id: str,
|
|
20
|
+
*,
|
|
21
|
+
model_family: str,
|
|
22
|
+
capability_class: str,
|
|
23
|
+
released_at: datetime,
|
|
24
|
+
available_to_us_at: datetime,
|
|
25
|
+
deprecated_at: datetime | None = None,
|
|
26
|
+
engine: Engine | None = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Insert a new model registry entry.
|
|
29
|
+
|
|
30
|
+
Per SPEC §3.2, ``model_id`` is the stable primary key and entries are
|
|
31
|
+
insert-only — re-registering the same id with different metadata is
|
|
32
|
+
rejected. To "upgrade" a model, register a new ``model_id``.
|
|
33
|
+
"""
|
|
34
|
+
if released_at > available_to_us_at:
|
|
35
|
+
raise ValueError(
|
|
36
|
+
"released_at MUST be <= available_to_us_at "
|
|
37
|
+
f"(got released_at={released_at!r}, available_to_us_at={available_to_us_at!r})"
|
|
38
|
+
)
|
|
39
|
+
eng = engine if engine is not None else make_engine()
|
|
40
|
+
with Session(eng) as sess:
|
|
41
|
+
existing = sess.get(ModelRegistryEntry, model_id)
|
|
42
|
+
if existing is not None:
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"model_id {model_id!r} already registered; "
|
|
45
|
+
"register a new model_id instead of modifying existing entries"
|
|
46
|
+
)
|
|
47
|
+
sess.add(
|
|
48
|
+
ModelRegistryEntry(
|
|
49
|
+
model_id=model_id,
|
|
50
|
+
model_family=model_family,
|
|
51
|
+
capability_class=capability_class,
|
|
52
|
+
released_at=released_at,
|
|
53
|
+
available_to_us_at=available_to_us_at,
|
|
54
|
+
deprecated_at=deprecated_at,
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
sess.commit()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@overload
|
|
61
|
+
def select_model(
|
|
62
|
+
capability_class: str,
|
|
63
|
+
*,
|
|
64
|
+
available_at: datetime,
|
|
65
|
+
strict: Literal[True],
|
|
66
|
+
engine: Engine | None = None,
|
|
67
|
+
) -> str: ...
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@overload
|
|
71
|
+
def select_model(
|
|
72
|
+
capability_class: str,
|
|
73
|
+
*,
|
|
74
|
+
available_at: datetime,
|
|
75
|
+
strict: Literal[False],
|
|
76
|
+
engine: Engine | None = None,
|
|
77
|
+
) -> tuple[str, bool]: ...
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def select_model(
|
|
81
|
+
capability_class: str,
|
|
82
|
+
*,
|
|
83
|
+
available_at: datetime,
|
|
84
|
+
strict: bool,
|
|
85
|
+
engine: Engine | None = None,
|
|
86
|
+
) -> str | tuple[str, bool]:
|
|
87
|
+
"""Pick a model_id for ``capability_class`` at the ``available_at`` instant.
|
|
88
|
+
|
|
89
|
+
``strict`` is keyword-only and has no default (SPEC §4.2). Callers MUST
|
|
90
|
+
explicitly state which mode they want — this is the friction that
|
|
91
|
+
prevents accidental look-ahead.
|
|
92
|
+
|
|
93
|
+
strict=True
|
|
94
|
+
Returns the most recently available model whose
|
|
95
|
+
``available_to_us_at <= available_at`` and which is not deprecated as
|
|
96
|
+
of ``available_at``. Raises ``NoEligibleModelError`` if none.
|
|
97
|
+
|
|
98
|
+
strict=False
|
|
99
|
+
Returns ``(model_id, is_anachronistic)`` for the latest currently
|
|
100
|
+
active model in the capability class regardless of timing.
|
|
101
|
+
``is_anachronistic`` is True iff that model was not yet available at
|
|
102
|
+
``available_at``.
|
|
103
|
+
"""
|
|
104
|
+
eng = engine if engine is not None else make_engine()
|
|
105
|
+
with Session(eng) as sess:
|
|
106
|
+
if strict:
|
|
107
|
+
stmt = (
|
|
108
|
+
select(ModelRegistryEntry)
|
|
109
|
+
.where(ModelRegistryEntry.capability_class == capability_class)
|
|
110
|
+
.where(ModelRegistryEntry.available_to_us_at <= available_at)
|
|
111
|
+
.where(
|
|
112
|
+
or_(
|
|
113
|
+
ModelRegistryEntry.deprecated_at.is_(None),
|
|
114
|
+
ModelRegistryEntry.deprecated_at > available_at,
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
.order_by(ModelRegistryEntry.available_to_us_at.desc())
|
|
118
|
+
)
|
|
119
|
+
entry = sess.scalars(stmt).first()
|
|
120
|
+
if entry is None:
|
|
121
|
+
raise NoEligibleModelError(
|
|
122
|
+
f"no model registered for capability_class={capability_class!r} "
|
|
123
|
+
f"available at {available_at.isoformat()} (strict mode)"
|
|
124
|
+
)
|
|
125
|
+
return entry.model_id
|
|
126
|
+
|
|
127
|
+
now = datetime.now(timezone.utc)
|
|
128
|
+
stmt = (
|
|
129
|
+
select(ModelRegistryEntry)
|
|
130
|
+
.where(ModelRegistryEntry.capability_class == capability_class)
|
|
131
|
+
.where(
|
|
132
|
+
or_(
|
|
133
|
+
ModelRegistryEntry.deprecated_at.is_(None),
|
|
134
|
+
ModelRegistryEntry.deprecated_at > now,
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
.order_by(ModelRegistryEntry.available_to_us_at.desc())
|
|
138
|
+
)
|
|
139
|
+
entry = sess.scalars(stmt).first()
|
|
140
|
+
if entry is None:
|
|
141
|
+
raise NoEligibleModelError(
|
|
142
|
+
f"no active model registered for capability_class={capability_class!r}"
|
|
143
|
+
)
|
|
144
|
+
is_anachronistic = entry.available_to_us_at > available_at
|
|
145
|
+
return entry.model_id, is_anachronistic
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Prompt template registry — Phase 0 backend is YAML files in a directory.
|
|
2
|
+
|
|
3
|
+
Per ROADMAP §3.1, prompt_registry in Phase 0 is filesystem-backed (git-tracked)
|
|
4
|
+
rather than DB-backed. The contract is the same set of MUST fields from
|
|
5
|
+
SPEC §3.3; only the backend differs.
|
|
6
|
+
|
|
7
|
+
Directory layout (relative to prompts_root, default ``prompts/`` at CWD):
|
|
8
|
+
|
|
9
|
+
prompts/
|
|
10
|
+
└── <project>/
|
|
11
|
+
└── <component>/
|
|
12
|
+
└── v1.yaml
|
|
13
|
+
└── v2.yaml
|
|
14
|
+
|
|
15
|
+
Each YAML file:
|
|
16
|
+
|
|
17
|
+
template_body: |
|
|
18
|
+
... prompt content with {variables} ...
|
|
19
|
+
template_format: jinja2 | fstring | raw
|
|
20
|
+
introduced_at: 2026-05-18T00:00:00+00:00
|
|
21
|
+
expected_output_schema: null # optional, JSON-Schema-style dict
|
|
22
|
+
superseded_at: null
|
|
23
|
+
superseded_by: null
|
|
24
|
+
notes: null
|
|
25
|
+
|
|
26
|
+
``prompt_template_id`` is derived from the path: ``<project>/<component>/v<N>``.
|
|
27
|
+
``prompt_template_hash`` is SHA-256 of ``template_body`` as bytes.
|
|
28
|
+
"""
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import hashlib
|
|
32
|
+
import os
|
|
33
|
+
import re
|
|
34
|
+
from dataclasses import dataclass
|
|
35
|
+
from datetime import datetime
|
|
36
|
+
from pathlib import Path
|
|
37
|
+
from typing import Any
|
|
38
|
+
|
|
39
|
+
import yaml
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_TEMPLATE_ID_RE = re.compile(r"^(?P<project>[a-z0-9_-]+)/(?P<component>[a-z0-9_-]+)/v\d+$")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class PromptTemplate:
|
|
47
|
+
prompt_template_id: str
|
|
48
|
+
prompt_template_hash: str
|
|
49
|
+
template_body: str
|
|
50
|
+
template_format: str
|
|
51
|
+
introduced_at: datetime
|
|
52
|
+
expected_output_schema: dict[str, Any] | None = None
|
|
53
|
+
superseded_at: datetime | None = None
|
|
54
|
+
superseded_by: str | None = None
|
|
55
|
+
notes: str | None = None
|
|
56
|
+
|
|
57
|
+
def render(self, **variables: Any) -> str:
|
|
58
|
+
"""Render the template with ``variables`` injected."""
|
|
59
|
+
if self.template_format == "raw":
|
|
60
|
+
if variables:
|
|
61
|
+
raise ValueError("raw template does not accept variables")
|
|
62
|
+
return self.template_body
|
|
63
|
+
if self.template_format == "fstring":
|
|
64
|
+
return self.template_body.format(**variables)
|
|
65
|
+
if self.template_format == "jinja2":
|
|
66
|
+
try:
|
|
67
|
+
import jinja2
|
|
68
|
+
except ImportError as exc: # pragma: no cover - optional dep
|
|
69
|
+
raise ImportError(
|
|
70
|
+
"template_format=jinja2 requires the 'jinja2' package; "
|
|
71
|
+
"install with `pip install jinja2`"
|
|
72
|
+
) from exc
|
|
73
|
+
return jinja2.Template(self.template_body).render(**variables)
|
|
74
|
+
raise ValueError(
|
|
75
|
+
f"unknown template_format={self.template_format!r}; "
|
|
76
|
+
"expected one of jinja2 | fstring | raw"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _prompts_root(explicit: str | os.PathLike[str] | None) -> Path:
|
|
81
|
+
if explicit is not None:
|
|
82
|
+
return Path(explicit).expanduser().resolve()
|
|
83
|
+
env = os.environ.get("TRACEGUARD_PROMPTS_DIR")
|
|
84
|
+
if env:
|
|
85
|
+
return Path(env).expanduser().resolve()
|
|
86
|
+
return Path.cwd() / "prompts"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def load_prompt(
|
|
90
|
+
template_id: str,
|
|
91
|
+
*,
|
|
92
|
+
prompts_root: str | os.PathLike[str] | None = None,
|
|
93
|
+
) -> PromptTemplate:
|
|
94
|
+
"""Load a prompt template by its id (``<project>/<component>/v<N>``)."""
|
|
95
|
+
if not _TEMPLATE_ID_RE.match(template_id):
|
|
96
|
+
raise ValueError(
|
|
97
|
+
f"invalid prompt_template_id {template_id!r}; "
|
|
98
|
+
"expected '<project>/<component>/v<N>' with snake-case names"
|
|
99
|
+
)
|
|
100
|
+
root = _prompts_root(prompts_root)
|
|
101
|
+
path = root / f"{template_id}.yaml"
|
|
102
|
+
if not path.is_file():
|
|
103
|
+
raise FileNotFoundError(f"prompt template not found: {path}")
|
|
104
|
+
with path.open("r", encoding="utf-8") as fp:
|
|
105
|
+
data = yaml.safe_load(fp) or {}
|
|
106
|
+
|
|
107
|
+
body = data.get("template_body")
|
|
108
|
+
if not isinstance(body, str):
|
|
109
|
+
raise ValueError(f"prompt template {template_id!r} missing string 'template_body'")
|
|
110
|
+
fmt = data.get("template_format", "raw")
|
|
111
|
+
if fmt not in {"jinja2", "fstring", "raw"}:
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"prompt template {template_id!r}: invalid template_format={fmt!r}; "
|
|
114
|
+
"expected jinja2 | fstring | raw"
|
|
115
|
+
)
|
|
116
|
+
introduced_raw = data.get("introduced_at")
|
|
117
|
+
if introduced_raw is None:
|
|
118
|
+
raise ValueError(f"prompt template {template_id!r} missing required 'introduced_at'")
|
|
119
|
+
introduced_at = (
|
|
120
|
+
introduced_raw if isinstance(introduced_raw, datetime) else datetime.fromisoformat(str(introduced_raw))
|
|
121
|
+
)
|
|
122
|
+
if introduced_at.tzinfo is None:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
f"prompt template {template_id!r}: introduced_at must be timezone-aware"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
superseded_raw = data.get("superseded_at")
|
|
128
|
+
superseded_at: datetime | None
|
|
129
|
+
if superseded_raw is None:
|
|
130
|
+
superseded_at = None
|
|
131
|
+
else:
|
|
132
|
+
superseded_at = (
|
|
133
|
+
superseded_raw
|
|
134
|
+
if isinstance(superseded_raw, datetime)
|
|
135
|
+
else datetime.fromisoformat(str(superseded_raw))
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return PromptTemplate(
|
|
139
|
+
prompt_template_id=template_id,
|
|
140
|
+
prompt_template_hash=hashlib.sha256(body.encode("utf-8")).hexdigest(),
|
|
141
|
+
template_body=body,
|
|
142
|
+
template_format=fmt,
|
|
143
|
+
introduced_at=introduced_at,
|
|
144
|
+
expected_output_schema=data.get("expected_output_schema"),
|
|
145
|
+
superseded_at=superseded_at,
|
|
146
|
+
superseded_by=data.get("superseded_by"),
|
|
147
|
+
notes=data.get("notes"),
|
|
148
|
+
)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Canonical input normalization for SHA-256 hashing (SPEC §4.4).
|
|
2
|
+
|
|
3
|
+
The normalization function is the single authoritative source for input_hash.
|
|
4
|
+
Business code MUST NOT compute hashes by other means; doing so breaks dedup
|
|
5
|
+
and replay consistency.
|
|
6
|
+
|
|
7
|
+
Rules (SPEC §4.4):
|
|
8
|
+
- dict: keys sorted, JSON serialized with separators=(",", ":"),
|
|
9
|
+
ensure_ascii=False
|
|
10
|
+
- str: leading/trailing whitespace stripped, all line endings → "\n"
|
|
11
|
+
- float: serialized via Python's json (shortest repr, stable across versions);
|
|
12
|
+
NaN / Inf raise (allow_nan=False) — business code must convert first
|
|
13
|
+
- bytes: base64-encoded into a marker dict {"__bytes_b64__": "..."}
|
|
14
|
+
- datetime: ISO 8601 with timezone (naive datetime → raises)
|
|
15
|
+
- Decimal: serialized via str() to preserve precision
|
|
16
|
+
- pydantic BaseModel: dumped via model_dump() then normalized
|
|
17
|
+
- list / tuple: order preserved, elements normalized recursively
|
|
18
|
+
- unknown types: str() fallback with a UserWarning
|
|
19
|
+
|
|
20
|
+
Changing this algorithm is a SPEC-major bump (SPEC §6.1) because it
|
|
21
|
+
invalidates historical input_hash comparisons.
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import base64
|
|
26
|
+
import hashlib
|
|
27
|
+
import json
|
|
28
|
+
import warnings
|
|
29
|
+
from datetime import datetime
|
|
30
|
+
from decimal import Decimal
|
|
31
|
+
from typing import Any
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from pydantic import BaseModel as _PydanticBaseModel
|
|
35
|
+
except ImportError: # pragma: no cover - pydantic is a required dep, but keep guard
|
|
36
|
+
_PydanticBaseModel = None # type: ignore[assignment, misc]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
_BYTES_MARKER = "__bytes_b64__"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _normalize(value: Any) -> Any:
|
|
43
|
+
if value is None:
|
|
44
|
+
return None
|
|
45
|
+
if isinstance(value, bool):
|
|
46
|
+
return value
|
|
47
|
+
if isinstance(value, int):
|
|
48
|
+
return value
|
|
49
|
+
if isinstance(value, float):
|
|
50
|
+
# json.dumps with allow_nan=False will raise on NaN/Inf below.
|
|
51
|
+
return value
|
|
52
|
+
if isinstance(value, str):
|
|
53
|
+
# Unify line endings then strip outer whitespace.
|
|
54
|
+
cleaned = value.replace("\r\n", "\n").replace("\r", "\n")
|
|
55
|
+
return cleaned.strip()
|
|
56
|
+
if isinstance(value, bytes):
|
|
57
|
+
return {_BYTES_MARKER: base64.b64encode(value).decode("ascii")}
|
|
58
|
+
if isinstance(value, datetime):
|
|
59
|
+
if value.tzinfo is None:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
"normalize_input: naive datetime not supported "
|
|
62
|
+
"(attach a timezone, e.g. datetime.now(timezone.utc))"
|
|
63
|
+
)
|
|
64
|
+
return value.isoformat()
|
|
65
|
+
if isinstance(value, Decimal):
|
|
66
|
+
return str(value)
|
|
67
|
+
if isinstance(value, dict):
|
|
68
|
+
return {str(k): _normalize(v) for k, v in value.items()}
|
|
69
|
+
if isinstance(value, (list, tuple)):
|
|
70
|
+
return [_normalize(v) for v in value]
|
|
71
|
+
if _PydanticBaseModel is not None and isinstance(value, _PydanticBaseModel):
|
|
72
|
+
return _normalize(value.model_dump())
|
|
73
|
+
warnings.warn(
|
|
74
|
+
f"normalize_input: unsupported type {type(value).__name__!r}, "
|
|
75
|
+
"falling back to str(); hash stability is not guaranteed across versions",
|
|
76
|
+
UserWarning,
|
|
77
|
+
stacklevel=3,
|
|
78
|
+
)
|
|
79
|
+
return str(value)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def normalize_input(data: Any) -> bytes:
|
|
83
|
+
"""Return canonical UTF-8 bytes for ``data``.
|
|
84
|
+
|
|
85
|
+
Identical inputs always yield identical bytes; semantically equivalent
|
|
86
|
+
inputs (e.g. dicts with different key orders) also collapse to the same
|
|
87
|
+
bytes.
|
|
88
|
+
"""
|
|
89
|
+
normalized = _normalize(data)
|
|
90
|
+
return json.dumps(
|
|
91
|
+
normalized,
|
|
92
|
+
sort_keys=True,
|
|
93
|
+
ensure_ascii=False,
|
|
94
|
+
separators=(",", ":"),
|
|
95
|
+
allow_nan=False,
|
|
96
|
+
).encode("utf-8")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def input_hash(data: Any) -> str:
|
|
100
|
+
"""Return SHA-256 hex digest of ``normalize_input(data)``."""
|
|
101
|
+
return hashlib.sha256(normalize_input(data)).hexdigest()
|
traceguard/sdk/tracer.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Tracer SDK — decorator + context manager (SPEC §4.1).
|
|
2
|
+
|
|
3
|
+
Phase 0 ships sync-only instrumentation. The two entry points share a single
|
|
4
|
+
``Span`` object that accumulates state and flushes one row to ``traces`` on
|
|
5
|
+
exit (success or failure).
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import time
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from decimal import Decimal
|
|
14
|
+
from functools import wraps
|
|
15
|
+
from typing import Any, Callable, Iterator
|
|
16
|
+
|
|
17
|
+
from sqlalchemy.engine import Engine
|
|
18
|
+
from sqlalchemy.orm import Session
|
|
19
|
+
|
|
20
|
+
from traceguard.sdk.normalizer import input_hash
|
|
21
|
+
from traceguard.store.models import Trace, make_engine
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
_INPUT_SUMMARY_MAX = 500
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _summarize(data: Any) -> str | None:
|
|
28
|
+
if data is None:
|
|
29
|
+
return None
|
|
30
|
+
text = data if isinstance(data, str) else repr(data)
|
|
31
|
+
return text[:_INPUT_SUMMARY_MAX]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _to_jsonable(value: Any) -> Any:
|
|
35
|
+
"""Best-effort coerce to a JSON-compatible structure for output_parsed."""
|
|
36
|
+
if value is None or isinstance(value, (bool, int, float, str)):
|
|
37
|
+
return value
|
|
38
|
+
if isinstance(value, dict):
|
|
39
|
+
return {str(k): _to_jsonable(v) for k, v in value.items()}
|
|
40
|
+
if isinstance(value, (list, tuple)):
|
|
41
|
+
return [_to_jsonable(v) for v in value]
|
|
42
|
+
dump = getattr(value, "model_dump", None) # pydantic v2
|
|
43
|
+
if callable(dump):
|
|
44
|
+
try:
|
|
45
|
+
return _to_jsonable(dump())
|
|
46
|
+
except Exception: # noqa: BLE001 - best effort
|
|
47
|
+
pass
|
|
48
|
+
try:
|
|
49
|
+
json.dumps(value)
|
|
50
|
+
return value
|
|
51
|
+
except (TypeError, ValueError):
|
|
52
|
+
return repr(value)[:_INPUT_SUMMARY_MAX]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Span:
|
|
56
|
+
"""Accumulator for one trace row. Created by ``Tracer.span`` / ``Tracer.trace``."""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
*,
|
|
61
|
+
project: str,
|
|
62
|
+
component: str,
|
|
63
|
+
operation: str,
|
|
64
|
+
engine: Engine,
|
|
65
|
+
correlation_id: str | None = None,
|
|
66
|
+
feature_as_of: datetime | None = None,
|
|
67
|
+
parent_trace_id: int | None = None,
|
|
68
|
+
) -> None:
|
|
69
|
+
self.project = project
|
|
70
|
+
self.component = component
|
|
71
|
+
self.operation = operation
|
|
72
|
+
self.correlation_id = correlation_id
|
|
73
|
+
self.feature_as_of = feature_as_of
|
|
74
|
+
self.parent_trace_id = parent_trace_id
|
|
75
|
+
|
|
76
|
+
self._engine = engine
|
|
77
|
+
self._start_perf: float = time.perf_counter()
|
|
78
|
+
self._committed = False
|
|
79
|
+
|
|
80
|
+
self._input_hash: str | None = None
|
|
81
|
+
self._input_summary: str | None = None
|
|
82
|
+
self._model_id: str | None = None
|
|
83
|
+
self._prompt_template_id: str | None = None
|
|
84
|
+
self._prompt_template_hash: str | None = None
|
|
85
|
+
self._output_parsed: Any = None
|
|
86
|
+
self._parse_status: str | None = None
|
|
87
|
+
self._latency_ms: int | None = None
|
|
88
|
+
self._tokens_in: int | None = None
|
|
89
|
+
self._tokens_out: int | None = None
|
|
90
|
+
self._cost_usd: Decimal | None = None
|
|
91
|
+
self._error_class: str | None = None
|
|
92
|
+
self._error_message: str | None = None
|
|
93
|
+
|
|
94
|
+
self.trace_id: int | None = None
|
|
95
|
+
|
|
96
|
+
def record_input(self, data: Any) -> None:
|
|
97
|
+
self._input_hash = input_hash(data)
|
|
98
|
+
self._input_summary = _summarize(data)
|
|
99
|
+
|
|
100
|
+
def record_model_prompt(
|
|
101
|
+
self,
|
|
102
|
+
*,
|
|
103
|
+
model_id: str | None = None,
|
|
104
|
+
prompt_template_id: str | None = None,
|
|
105
|
+
prompt_template_hash: str | None = None,
|
|
106
|
+
) -> None:
|
|
107
|
+
if model_id is not None:
|
|
108
|
+
self._model_id = model_id
|
|
109
|
+
if prompt_template_id is not None:
|
|
110
|
+
self._prompt_template_id = prompt_template_id
|
|
111
|
+
if prompt_template_hash is not None:
|
|
112
|
+
self._prompt_template_hash = prompt_template_hash
|
|
113
|
+
|
|
114
|
+
def record_output(
|
|
115
|
+
self,
|
|
116
|
+
*,
|
|
117
|
+
parsed: Any = None,
|
|
118
|
+
parse_status: str = "success",
|
|
119
|
+
) -> None:
|
|
120
|
+
if parse_status not in {"success", "partial", "failed"}:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
f"parse_status must be one of success | partial | failed, got {parse_status!r}"
|
|
123
|
+
)
|
|
124
|
+
self._output_parsed = _to_jsonable(parsed) if parsed is not None else None
|
|
125
|
+
self._parse_status = parse_status
|
|
126
|
+
|
|
127
|
+
def record_perf(
|
|
128
|
+
self,
|
|
129
|
+
*,
|
|
130
|
+
latency_ms: int | None = None,
|
|
131
|
+
tokens_in: int | None = None,
|
|
132
|
+
tokens_out: int | None = None,
|
|
133
|
+
cost_usd: float | Decimal | None = None,
|
|
134
|
+
) -> None:
|
|
135
|
+
if latency_ms is not None:
|
|
136
|
+
self._latency_ms = int(latency_ms)
|
|
137
|
+
if tokens_in is not None:
|
|
138
|
+
self._tokens_in = int(tokens_in)
|
|
139
|
+
if tokens_out is not None:
|
|
140
|
+
self._tokens_out = int(tokens_out)
|
|
141
|
+
if cost_usd is not None:
|
|
142
|
+
self._cost_usd = Decimal(str(cost_usd))
|
|
143
|
+
|
|
144
|
+
def record_error(self, exc: BaseException) -> None:
|
|
145
|
+
self._error_class = type(exc).__name__
|
|
146
|
+
self._error_message = str(exc)
|
|
147
|
+
if self._parse_status is None:
|
|
148
|
+
self._parse_status = "failed"
|
|
149
|
+
|
|
150
|
+
def _flush(self) -> None:
|
|
151
|
+
if self._committed:
|
|
152
|
+
return
|
|
153
|
+
if self._input_hash is None:
|
|
154
|
+
self._input_hash = input_hash(None)
|
|
155
|
+
if self._parse_status is None:
|
|
156
|
+
self._parse_status = "success" if self._error_class is None else "failed"
|
|
157
|
+
if self._latency_ms is None:
|
|
158
|
+
self._latency_ms = int((time.perf_counter() - self._start_perf) * 1000)
|
|
159
|
+
row = Trace(
|
|
160
|
+
project=self.project,
|
|
161
|
+
component=self.component,
|
|
162
|
+
operation=self.operation,
|
|
163
|
+
parent_trace_id=self.parent_trace_id,
|
|
164
|
+
correlation_id=self.correlation_id,
|
|
165
|
+
input_hash=self._input_hash,
|
|
166
|
+
input_summary=self._input_summary,
|
|
167
|
+
model_id=self._model_id,
|
|
168
|
+
prompt_template_id=self._prompt_template_id,
|
|
169
|
+
prompt_template_hash=self._prompt_template_hash,
|
|
170
|
+
output_parsed=self._output_parsed,
|
|
171
|
+
parse_status=self._parse_status,
|
|
172
|
+
latency_ms=self._latency_ms,
|
|
173
|
+
tokens_in=self._tokens_in,
|
|
174
|
+
tokens_out=self._tokens_out,
|
|
175
|
+
cost_usd=self._cost_usd,
|
|
176
|
+
feature_as_of=self.feature_as_of,
|
|
177
|
+
invoked_at=datetime.now(timezone.utc),
|
|
178
|
+
error_class=self._error_class,
|
|
179
|
+
error_message=self._error_message,
|
|
180
|
+
)
|
|
181
|
+
with Session(self._engine) as sess:
|
|
182
|
+
sess.add(row)
|
|
183
|
+
sess.commit()
|
|
184
|
+
self.trace_id = row.trace_id
|
|
185
|
+
self._committed = True
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class Tracer:
|
|
189
|
+
"""Holds the persistence engine and emits ``Span`` objects."""
|
|
190
|
+
|
|
191
|
+
def __init__(self, engine: Engine | None = None) -> None:
|
|
192
|
+
self._engine = engine
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def engine(self) -> Engine:
|
|
196
|
+
if self._engine is None:
|
|
197
|
+
self._engine = make_engine()
|
|
198
|
+
return self._engine
|
|
199
|
+
|
|
200
|
+
def configure(self, engine: Engine) -> None:
|
|
201
|
+
"""Override the engine — useful for tests."""
|
|
202
|
+
self._engine = engine
|
|
203
|
+
|
|
204
|
+
@contextmanager
|
|
205
|
+
def span(
|
|
206
|
+
self,
|
|
207
|
+
project: str,
|
|
208
|
+
component: str,
|
|
209
|
+
operation: str,
|
|
210
|
+
*,
|
|
211
|
+
correlation_id: str | None = None,
|
|
212
|
+
feature_as_of: datetime | None = None,
|
|
213
|
+
parent_trace_id: int | None = None,
|
|
214
|
+
) -> Iterator[Span]:
|
|
215
|
+
span = Span(
|
|
216
|
+
project=project,
|
|
217
|
+
component=component,
|
|
218
|
+
operation=operation,
|
|
219
|
+
engine=self.engine,
|
|
220
|
+
correlation_id=correlation_id,
|
|
221
|
+
feature_as_of=feature_as_of,
|
|
222
|
+
parent_trace_id=parent_trace_id,
|
|
223
|
+
)
|
|
224
|
+
try:
|
|
225
|
+
yield span
|
|
226
|
+
except BaseException as exc:
|
|
227
|
+
span.record_error(exc)
|
|
228
|
+
span._flush()
|
|
229
|
+
raise
|
|
230
|
+
else:
|
|
231
|
+
span._flush()
|
|
232
|
+
|
|
233
|
+
def trace(
|
|
234
|
+
self,
|
|
235
|
+
project: str,
|
|
236
|
+
component: str,
|
|
237
|
+
operation: str,
|
|
238
|
+
*,
|
|
239
|
+
correlation_from: Callable[..., str] | None = None,
|
|
240
|
+
feature_as_of_from: Callable[..., datetime] | None = None,
|
|
241
|
+
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
|
242
|
+
"""Decorator form. Best-effort auto-records (args, kwargs) as input and
|
|
243
|
+
the return value as ``output_parsed``. For finer control use ``span``.
|
|
244
|
+
"""
|
|
245
|
+
def decorator(fn: Callable[..., Any]) -> Callable[..., Any]:
|
|
246
|
+
@wraps(fn)
|
|
247
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
248
|
+
corr = correlation_from(*args, **kwargs) if correlation_from else None
|
|
249
|
+
feat = feature_as_of_from(*args, **kwargs) if feature_as_of_from else None
|
|
250
|
+
with self.span(
|
|
251
|
+
project,
|
|
252
|
+
component,
|
|
253
|
+
operation,
|
|
254
|
+
correlation_id=corr,
|
|
255
|
+
feature_as_of=feat,
|
|
256
|
+
) as sp:
|
|
257
|
+
sp.record_input({"args": list(args), "kwargs": dict(kwargs)})
|
|
258
|
+
result = fn(*args, **kwargs)
|
|
259
|
+
sp.record_output(parsed=result)
|
|
260
|
+
return result
|
|
261
|
+
|
|
262
|
+
return wrapper
|
|
263
|
+
|
|
264
|
+
return decorator
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
tracer = Tracer()
|
|
268
|
+
"""Module-level default tracer. Configure via ``TRACEGUARD_DB_URL`` env var,
|
|
269
|
+
or call ``tracer.configure(engine)`` to inject a custom engine."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Client wrappers — auto-instrumentation for common LLM SDKs."""
|