trajectly 0.3.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. trajectly-0.3.0rc1/LICENSE +21 -0
  2. trajectly-0.3.0rc1/PKG-INFO +88 -0
  3. trajectly-0.3.0rc1/README.md +61 -0
  4. trajectly-0.3.0rc1/pyproject.toml +69 -0
  5. trajectly-0.3.0rc1/setup.cfg +4 -0
  6. trajectly-0.3.0rc1/src/sitecustomize.py +10 -0
  7. trajectly-0.3.0rc1/src/trajectly/__init__.py +5 -0
  8. trajectly-0.3.0rc1/src/trajectly/__main__.py +5 -0
  9. trajectly-0.3.0rc1/src/trajectly/abstraction/__init__.py +20 -0
  10. trajectly-0.3.0rc1/src/trajectly/abstraction/pipeline.py +152 -0
  11. trajectly-0.3.0rc1/src/trajectly/abstraction/predicates.py +68 -0
  12. trajectly-0.3.0rc1/src/trajectly/benchmark.py +78 -0
  13. trajectly-0.3.0rc1/src/trajectly/canonical.py +17 -0
  14. trajectly-0.3.0rc1/src/trajectly/cli.py +312 -0
  15. trajectly-0.3.0rc1/src/trajectly/constants.py +55 -0
  16. trajectly-0.3.0rc1/src/trajectly/contracts.py +570 -0
  17. trajectly-0.3.0rc1/src/trajectly/diff/__init__.py +4 -0
  18. trajectly-0.3.0rc1/src/trajectly/diff/engine.py +212 -0
  19. trajectly-0.3.0rc1/src/trajectly/diff/lcs.py +29 -0
  20. trajectly-0.3.0rc1/src/trajectly/diff/models.py +29 -0
  21. trajectly-0.3.0rc1/src/trajectly/diff/structural.py +55 -0
  22. trajectly-0.3.0rc1/src/trajectly/engine.py +1090 -0
  23. trajectly-0.3.0rc1/src/trajectly/engine_common.py +76 -0
  24. trajectly-0.3.0rc1/src/trajectly/errors.py +56 -0
  25. trajectly-0.3.0rc1/src/trajectly/events.py +101 -0
  26. trajectly-0.3.0rc1/src/trajectly/fixtures.py +216 -0
  27. trajectly-0.3.0rc1/src/trajectly/normalize/__init__.py +19 -0
  28. trajectly-0.3.0rc1/src/trajectly/normalize/canonical.py +115 -0
  29. trajectly-0.3.0rc1/src/trajectly/normalize/version.py +5 -0
  30. trajectly-0.3.0rc1/src/trajectly/plugins/__init__.py +5 -0
  31. trajectly-0.3.0rc1/src/trajectly/plugins/cloud_exporter.py +127 -0
  32. trajectly-0.3.0rc1/src/trajectly/plugins/interfaces.py +24 -0
  33. trajectly-0.3.0rc1/src/trajectly/plugins/loader.py +39 -0
  34. trajectly-0.3.0rc1/src/trajectly/redaction.py +31 -0
  35. trajectly-0.3.0rc1/src/trajectly/refinement/__init__.py +10 -0
  36. trajectly-0.3.0rc1/src/trajectly/refinement/checker.py +167 -0
  37. trajectly-0.3.0rc1/src/trajectly/refinement/skeleton.py +33 -0
  38. trajectly-0.3.0rc1/src/trajectly/replay_guard.py +267 -0
  39. trajectly-0.3.0rc1/src/trajectly/report/__init__.py +3 -0
  40. trajectly-0.3.0rc1/src/trajectly/report/renderers.py +91 -0
  41. trajectly-0.3.0rc1/src/trajectly/report/schema.py +112 -0
  42. trajectly-0.3.0rc1/src/trajectly/runtime.py +123 -0
  43. trajectly-0.3.0rc1/src/trajectly/schema.py +171 -0
  44. trajectly-0.3.0rc1/src/trajectly/sdk/__init__.py +71 -0
  45. trajectly-0.3.0rc1/src/trajectly/sdk/adapters.py +344 -0
  46. trajectly-0.3.0rc1/src/trajectly/sdk/context.py +450 -0
  47. trajectly-0.3.0rc1/src/trajectly/shrink/__init__.py +3 -0
  48. trajectly-0.3.0rc1/src/trajectly/shrink/ddmin.py +85 -0
  49. trajectly-0.3.0rc1/src/trajectly/specs/__init__.py +70 -0
  50. trajectly-0.3.0rc1/src/trajectly/specs/compat_v02.py +316 -0
  51. trajectly-0.3.0rc1/src/trajectly/specs/migrate.py +102 -0
  52. trajectly-0.3.0rc1/src/trajectly/specs/v03.py +394 -0
  53. trajectly-0.3.0rc1/src/trajectly/trace/__init__.py +26 -0
  54. trajectly-0.3.0rc1/src/trajectly/trace/io.py +70 -0
  55. trajectly-0.3.0rc1/src/trajectly/trace/meta.py +20 -0
  56. trajectly-0.3.0rc1/src/trajectly/trace/models.py +76 -0
  57. trajectly-0.3.0rc1/src/trajectly/trace/validate.py +81 -0
  58. trajectly-0.3.0rc1/src/trajectly/trt/__init__.py +8 -0
  59. trajectly-0.3.0rc1/src/trajectly/trt/runner.py +316 -0
  60. trajectly-0.3.0rc1/src/trajectly/trt/types.py +18 -0
  61. trajectly-0.3.0rc1/src/trajectly/trt/witness.py +54 -0
  62. trajectly-0.3.0rc1/src/trajectly.egg-info/PKG-INFO +88 -0
  63. trajectly-0.3.0rc1/src/trajectly.egg-info/SOURCES.txt +65 -0
  64. trajectly-0.3.0rc1/src/trajectly.egg-info/dependency_links.txt +1 -0
  65. trajectly-0.3.0rc1/src/trajectly.egg-info/entry_points.txt +2 -0
  66. trajectly-0.3.0rc1/src/trajectly.egg-info/requires.txt +9 -0
  67. trajectly-0.3.0rc1/src/trajectly.egg-info/top_level.txt +2 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Trajectly
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,88 @@
1
+ Metadata-Version: 2.4
2
+ Name: trajectly
3
+ Version: 0.3.0rc1
4
+ Summary: Trajectory Refinement Testing (TRT) for deterministic agent CI
5
+ Author-email: Ahmed Ashmawy <awashmawy@proton.me>
6
+ License: MIT
7
+ Keywords: agents,testing,regression,llm,ci
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Software Development :: Testing
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: typer<1,>=0.12
19
+ Requires-Dist: PyYAML<7,>=6
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest<9,>=8; extra == "dev"
22
+ Requires-Dist: pytest-cov<6,>=5; extra == "dev"
23
+ Requires-Dist: ruff<1,>=0.6; extra == "dev"
24
+ Requires-Dist: mypy<2,>=1.11; extra == "dev"
25
+ Requires-Dist: types-PyYAML<7,>=6.0; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # Trajectly
29
+
30
+ Deterministic regression testing for AI agents. Record a baseline, enforce contracts, catch regressions before they ship.
31
+
32
+ ## Install
33
+
34
+ ```bash
35
+ pip install trajectly
36
+ ```
37
+
38
+ ## 30-Second Example
39
+
40
+ Trajectly works in three steps: **record** a known-good baseline, **run** against it later, and **get a verdict**.
41
+
42
+ ```bash
43
+ # Clone the repo and install dev dependencies
44
+ git clone https://github.com/trajectly/trajectly.git
45
+ cd trajectly
46
+ pip install -e ".[dev]"
47
+
48
+ # Set your OpenAI key (the example calls gpt-4o-mini)
49
+ export OPENAI_API_KEY="sk-..."
50
+
51
+ # 1. Record the baseline
52
+ cd examples
53
+ trajectly record specs/trt-support-triage-baseline.agent.yaml
54
+
55
+ # 2. Run the regression variant against it
56
+ trajectly run specs/trt-support-triage-regression.agent.yaml
57
+
58
+ # 3. See what broke
59
+ trajectly report
60
+ ```
61
+
62
+ The report shows exactly **which step** failed, **why** (the regression calls `unsafe_export`, which is denied by policy), and gives you a **deterministic repro command**.
63
+
64
+ ## How It Works
65
+
66
+ 1. **Record** -- run your agent normally. Trajectly captures every tool call and LLM response as a trace.
67
+ 2. **Replay** -- re-run the agent. Trajectly replays recorded LLM responses from fixtures so results are deterministic.
68
+ 3. **Compare** -- Trajectly checks the new trace against the baseline:
69
+ - **Contracts**: are only allowed tools called? Are denied tools blocked?
70
+ - **Refinement**: does the new call sequence preserve the baseline sequence?
71
+ 4. **Verdict** -- PASS or FAIL with the exact failure step (witness index), violation code, and a copy-paste repro command.
72
+
73
+ ## Examples
74
+
75
+ | Example | Provider | Tools | What it tests |
76
+ |---------|----------|-------|---------------|
77
+ | [Ticket Classifier](docs/tutorial-support-triage.md) | OpenAI | `fetch_ticket`, `store_triage` | Simple 2-tool agent with contract enforcement |
78
+ | [Code Review Bot](docs/tutorial-code-review-bot.md) | Gemini | `fetch_pr`, `lint_code`, `post_review` | Multi-tool sequence with policy guardrails |
79
+
80
+ ## Documentation
81
+
82
+ - [Full documentation](docs/trajectly.md) -- concepts, CLI reference, spec format, SDK reference
83
+ - [Tutorial: Ticket Classifier](docs/tutorial-support-triage.md) -- step-by-step simple example
84
+ - [Tutorial: Code Review Bot](docs/tutorial-code-review-bot.md) -- step-by-step medium example
85
+
86
+ ## License
87
+
88
+ MIT
@@ -0,0 +1,61 @@
1
+ # Trajectly
2
+
3
+ Deterministic regression testing for AI agents. Record a baseline, enforce contracts, catch regressions before they ship.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install trajectly
9
+ ```
10
+
11
+ ## 30-Second Example
12
+
13
+ Trajectly works in three steps: **record** a known-good baseline, **run** against it later, and **get a verdict**.
14
+
15
+ ```bash
16
+ # Clone the repo and install dev dependencies
17
+ git clone https://github.com/trajectly/trajectly.git
18
+ cd trajectly
19
+ pip install -e ".[dev]"
20
+
21
+ # Set your OpenAI key (the example calls gpt-4o-mini)
22
+ export OPENAI_API_KEY="sk-..."
23
+
24
+ # 1. Record the baseline
25
+ cd examples
26
+ trajectly record specs/trt-support-triage-baseline.agent.yaml
27
+
28
+ # 2. Run the regression variant against it
29
+ trajectly run specs/trt-support-triage-regression.agent.yaml
30
+
31
+ # 3. See what broke
32
+ trajectly report
33
+ ```
34
+
35
+ The report shows exactly **which step** failed, **why** (the regression calls `unsafe_export`, which is denied by policy), and gives you a **deterministic repro command**.
36
+
37
+ ## How It Works
38
+
39
+ 1. **Record** -- run your agent normally. Trajectly captures every tool call and LLM response as a trace.
40
+ 2. **Replay** -- re-run the agent. Trajectly replays recorded LLM responses from fixtures so results are deterministic.
41
+ 3. **Compare** -- Trajectly checks the new trace against the baseline:
42
+ - **Contracts**: are only allowed tools called? Are denied tools blocked?
43
+ - **Refinement**: does the new call sequence preserve the baseline sequence?
44
+ 4. **Verdict** -- PASS or FAIL with the exact failure step (witness index), violation code, and a copy-paste repro command.
45
+
46
+ ## Examples
47
+
48
+ | Example | Provider | Tools | What it tests |
49
+ |---------|----------|-------|---------------|
50
+ | [Ticket Classifier](docs/tutorial-support-triage.md) | OpenAI | `fetch_ticket`, `store_triage` | Simple 2-tool agent with contract enforcement |
51
+ | [Code Review Bot](docs/tutorial-code-review-bot.md) | Gemini | `fetch_pr`, `lint_code`, `post_review` | Multi-tool sequence with policy guardrails |
52
+
53
+ ## Documentation
54
+
55
+ - [Full documentation](docs/trajectly.md) -- concepts, CLI reference, spec format, SDK reference
56
+ - [Tutorial: Ticket Classifier](docs/tutorial-support-triage.md) -- step-by-step simple example
57
+ - [Tutorial: Code Review Bot](docs/tutorial-code-review-bot.md) -- step-by-step medium example
58
+
59
+ ## License
60
+
61
+ MIT
@@ -0,0 +1,69 @@
1
+ [build-system]
2
+ requires = ["setuptools>=69", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "trajectly"
7
+ version = "0.3.0rc1"
8
+ description = "Trajectory Refinement Testing (TRT) for deterministic agent CI"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ {name = "Ahmed Ashmawy", email = "awashmawy@proton.me"}
14
+ ]
15
+ keywords = ["agents", "testing", "regression", "llm", "ci"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Software Development :: Testing",
24
+ ]
25
+ dependencies = [
26
+ "typer>=0.12,<1",
27
+ "PyYAML>=6,<7",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ dev = [
32
+ "pytest>=8,<9",
33
+ "pytest-cov>=5,<6",
34
+ "ruff>=0.6,<1",
35
+ "mypy>=1.11,<2",
36
+ "types-PyYAML>=6.0,<7",
37
+ ]
38
+
39
+ [project.scripts]
40
+ trajectly = "trajectly.cli:app"
41
+
42
+ [tool.setuptools]
43
+ package-dir = {"" = "src"}
44
+ py-modules = ["sitecustomize"]
45
+
46
+ [tool.setuptools.packages.find]
47
+ where = ["src"]
48
+
49
+ [tool.pytest.ini_options]
50
+ pythonpath = ["src"]
51
+ testpaths = ["tests"]
52
+
53
+ [tool.ruff]
54
+ line-length = 120
55
+ target-version = "py311"
56
+
57
+ [tool.ruff.lint]
58
+ select = ["E", "F", "I", "B", "UP", "N", "RUF"]
59
+ ignore = ["B008"]
60
+
61
+ [tool.mypy]
62
+ python_version = "3.11"
63
+ strict = true
64
+ warn_unused_ignores = true
65
+ warn_redundant_casts = true
66
+ warn_unreachable = true
67
+ show_error_codes = true
68
+ pretty = true
69
+ packages = ["trajectly"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ if os.getenv("TRAJECTLY_REPLAY_GUARD") == "1":
6
+ # Import-time hook used by subprocess replays. Keeping this in
7
+ # `sitecustomize` ensures the guard is active before user code runs.
8
+ from trajectly.replay_guard import activate
9
+
10
+ activate()
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ __all__ = ["__version__"]
4
+
5
+ __version__ = "0.3.0rc1"
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from trajectly.cli import app
4
+
5
+ app()
@@ -0,0 +1,20 @@
1
+ from trajectly.abstraction.pipeline import AbstractionConfig, AbstractTrace, Token, build_abstract_trace
2
+ from trajectly.abstraction.predicates import (
3
+ contains_email,
4
+ contains_phone,
5
+ extract_domains,
6
+ extract_numeric_values,
7
+ )
8
+
9
+ # Keep this export surface stable for downstream integrations importing
10
+ # abstraction helpers directly from `trajectly.abstraction`.
11
+ __all__ = [
12
+ "AbstractTrace",
13
+ "AbstractionConfig",
14
+ "Token",
15
+ "build_abstract_trace",
16
+ "contains_email",
17
+ "contains_phone",
18
+ "extract_domains",
19
+ "extract_numeric_values",
20
+ ]
@@ -0,0 +1,152 @@
1
+ """Deterministic abstraction pipeline (Definition 2 in trt_theory.md).
2
+
3
+ Implements ``build_abstract_trace``, which maps a concrete event trace to an
4
+ abstract representation consisting of a token stream and a predicate bag.
5
+
6
+ **Determinism (Theorem 2 precondition):** The pipeline iterates events by
7
+ index, applies a fixed token-mapping function per event type, and accumulates
8
+ predicates in a single pass. Output keys are sorted (``tool_calls_by_name``,
9
+ ``domains``) so the abstract trace is identical for identical inputs regardless
10
+ of Python dict insertion order.
11
+
12
+ **Abstraction homomorphism:** ``alpha(T, c) = (Tokens, Predicates)`` where
13
+ each token preserves the event index and causal kind, and predicates aggregate
14
+ over the full trace.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass, field
20
+ from typing import Any, Literal
21
+
22
+ from trajectly.abstraction.predicates import (
23
+ contains_email,
24
+ contains_phone,
25
+ extract_domains,
26
+ extract_numeric_values,
27
+ )
28
+ from trajectly.events import TraceEvent
29
+
30
+ TokenKind = Literal[
31
+ "CALL",
32
+ "RESULT",
33
+ "LLM_REQUEST",
34
+ "LLM_RESPONSE",
35
+ "MESSAGE",
36
+ "OBSERVATION",
37
+ "ERROR",
38
+ ]
39
+
40
+
41
+ @dataclass(slots=True)
42
+ class Token:
43
+ event_index: int
44
+ kind: TokenKind
45
+ name: str
46
+ payload: dict[str, Any]
47
+
48
+
49
+ @dataclass(slots=True)
50
+ class AbstractionConfig:
51
+ ignore_call_tools: list[str] = field(default_factory=list)
52
+ enable_pii_detection: bool = True
53
+ enable_domain_extraction: bool = True
54
+ enable_numeric_extraction: bool = True
55
+
56
+
57
+ @dataclass(slots=True)
58
+ class AbstractTrace:
59
+ tokens: list[Token]
60
+ predicates: dict[str, Any]
61
+
62
+
63
+ def _token_from_event(event: TraceEvent, event_index: int, ignore_call_tools: set[str]) -> Token | None:
64
+ # Event-to-token mapping is intentionally conservative: only stable,
65
+ # contract-relevant event types feed TRT abstraction.
66
+ payload = dict(event.payload)
67
+ if event.event_type == "tool_called":
68
+ tool_name = str(payload.get("tool_name", "unknown"))
69
+ if tool_name in ignore_call_tools:
70
+ return None
71
+ return Token(event_index=event_index, kind="CALL", name=tool_name, payload=payload)
72
+ if event.event_type == "tool_returned":
73
+ tool_name = str(payload.get("tool_name", "unknown"))
74
+ return Token(event_index=event_index, kind="RESULT", name=tool_name, payload=payload)
75
+ if event.event_type == "llm_called":
76
+ provider = str(payload.get("provider", "unknown"))
77
+ model = str(payload.get("model", "unknown"))
78
+ return Token(event_index=event_index, kind="LLM_REQUEST", name=f"{provider}:{model}", payload=payload)
79
+ if event.event_type == "llm_returned":
80
+ provider = str(payload.get("provider", "unknown"))
81
+ model = str(payload.get("model", "unknown"))
82
+ return Token(event_index=event_index, kind="LLM_RESPONSE", name=f"{provider}:{model}", payload=payload)
83
+ if event.event_type == "agent_step":
84
+ name = str(payload.get("name", "step"))
85
+ return Token(event_index=event_index, kind="MESSAGE", name=name, payload=payload)
86
+ if event.event_type == "run_finished":
87
+ return Token(event_index=event_index, kind="OBSERVATION", name="run_finished", payload=payload)
88
+ return None
89
+
90
+
91
+ def build_abstract_trace(
92
+ events: list[TraceEvent],
93
+ *,
94
+ config: AbstractionConfig | None = None,
95
+ ) -> AbstractTrace:
96
+ cfg = config or AbstractionConfig()
97
+ ignore_call_tools = set(cfg.ignore_call_tools)
98
+ tokens: list[Token] = []
99
+
100
+ for index, event in enumerate(events):
101
+ token = _token_from_event(event, index, ignore_call_tools)
102
+ if token is not None:
103
+ tokens.append(token)
104
+
105
+ # Predicate bag shape is fixed for deterministic report payloads.
106
+ predicates: dict[str, Any] = {
107
+ "tool_calls_total": sum(1 for token in tokens if token.kind == "CALL"),
108
+ "tool_calls_by_name": {},
109
+ "domains": [],
110
+ "pii": {"email": False, "phone": False},
111
+ "max_numeric_value": None,
112
+ "refund_count": 0,
113
+ }
114
+
115
+ tool_counts: dict[str, int] = {}
116
+ domains: set[str] = set()
117
+ numeric_values: list[float] = []
118
+ has_email = False
119
+ has_phone = False
120
+ refund_count = 0
121
+
122
+ for token in tokens:
123
+ # Predicates are derived in a single deterministic pass so witness-level
124
+ # checks can be reproduced exactly in CI replay.
125
+ if token.kind == "CALL":
126
+ tool_counts[token.name] = tool_counts.get(token.name, 0) + 1
127
+ if "refund" in token.name.lower():
128
+ refund_count += 1
129
+
130
+ if cfg.enable_domain_extraction:
131
+ domains.update(extract_domains(token.payload))
132
+ if cfg.enable_numeric_extraction:
133
+ numeric_values.extend(extract_numeric_values(token.payload))
134
+ if cfg.enable_pii_detection:
135
+ has_email = has_email or contains_email(token.payload)
136
+ has_phone = has_phone or contains_phone(token.payload)
137
+
138
+ predicates["tool_calls_by_name"] = dict(sorted(tool_counts.items()))
139
+ predicates["refund_count"] = refund_count
140
+ predicates["domains"] = sorted(domains)
141
+ predicates["pii"] = {"email": has_email, "phone": has_phone}
142
+ predicates["max_numeric_value"] = max(numeric_values) if numeric_values else None
143
+
144
+ return AbstractTrace(tokens=tokens, predicates=predicates)
145
+
146
+
147
+ __all__ = [
148
+ "AbstractTrace",
149
+ "AbstractionConfig",
150
+ "Token",
151
+ "build_abstract_trace",
152
+ ]
@@ -0,0 +1,68 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from collections.abc import Iterable
5
+ from typing import Any
6
+ from urllib.parse import urlparse
7
+
8
+ EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
9
+ PHONE_RE = re.compile(r"\b(?:\+?1[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}\b")
10
+ URL_RE = re.compile(r"https?://[^\s)]+")
11
+
12
+
13
+ def _walk_strings(value: Any) -> Iterable[str]:
14
+ # Predicate extraction intentionally walks only serializable payload-like
15
+ # shapes to keep abstraction deterministic and side-effect free.
16
+ if isinstance(value, str):
17
+ yield value
18
+ return
19
+ if isinstance(value, dict):
20
+ for item in value.values():
21
+ yield from _walk_strings(item)
22
+ return
23
+ if isinstance(value, list | tuple):
24
+ for item in value:
25
+ yield from _walk_strings(item)
26
+
27
+
28
+ def contains_email(value: Any) -> bool:
29
+ return any(EMAIL_RE.search(text) for text in _walk_strings(value))
30
+
31
+
32
+ def contains_phone(value: Any) -> bool:
33
+ return any(PHONE_RE.search(text) for text in _walk_strings(value))
34
+
35
+
36
+ def extract_domains(value: Any) -> list[str]:
37
+ domains: set[str] = set()
38
+ for text in _walk_strings(value):
39
+ candidates = [text, *URL_RE.findall(text)]
40
+ for candidate in candidates:
41
+ parsed = urlparse(candidate)
42
+ host = parsed.hostname
43
+ if host:
44
+ domains.add(host.lower())
45
+ return sorted(domains)
46
+
47
+
48
+ def extract_numeric_values(value: Any) -> list[float]:
49
+ numbers: list[float] = []
50
+ if isinstance(value, int | float):
51
+ return [float(value)]
52
+ if isinstance(value, dict):
53
+ for item in value.values():
54
+ numbers.extend(extract_numeric_values(item))
55
+ return numbers
56
+ if isinstance(value, list | tuple):
57
+ for item in value:
58
+ numbers.extend(extract_numeric_values(item))
59
+ return numbers
60
+ return numbers
61
+
62
+
63
+ __all__ = [
64
+ "contains_email",
65
+ "contains_phone",
66
+ "extract_domains",
67
+ "extract_numeric_values",
68
+ ]
@@ -0,0 +1,78 @@
1
+ """
2
+ TRT performance benchmark harness (QA-T007).
3
+
4
+ Runs TRT run_specs in a minimal workspace repeatedly and reports wall-clock times.
5
+ Deterministic and offline-safe (replay-only; no network).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import tempfile
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from trajectly.constants import EXIT_SUCCESS
16
+ from trajectly.engine import initialize_workspace, record_specs, run_specs
17
+
18
+
19
+ def _write(path: Path, body: str) -> None:
20
+ path.write_text(body.strip() + "\n", encoding="utf-8")
21
+
22
+
23
+ def _setup_workspace(root: Path) -> Path:
24
+ """Create minimal TRT workspace with one spec and agent; record baseline. Returns spec path."""
25
+ initialize_workspace(root)
26
+ agent = root / "agent.py"
27
+ _write(agent, "print('ok')")
28
+ spec = root / "bench.agent.yaml"
29
+ _write(
30
+ spec,
31
+ """
32
+ schema_version: "0.3"
33
+ name: bench
34
+ command: python agent.py
35
+ workdir: .
36
+ strict: true
37
+ """,
38
+ )
39
+ outcome = record_specs(targets=[str(spec)], project_root=root)
40
+ if outcome.exit_code != EXIT_SUCCESS:
41
+ raise RuntimeError(f"record_specs failed: {outcome.errors}")
42
+ return spec
43
+
44
+
45
+ def run_benchmark(iterations: int = 5) -> dict[str, Any]:
46
+ """Run TRT run_specs `iterations` times in a fresh workspace; return timings and summary."""
47
+ times_s: list[float] = []
48
+ with tempfile.TemporaryDirectory(prefix="trajectly_bench_") as tmp:
49
+ root = Path(tmp)
50
+ spec = _setup_workspace(root)
51
+ for _ in range(iterations):
52
+ t0 = time.perf_counter()
53
+ outcome = run_specs(targets=[str(spec)], project_root=root)
54
+ t1 = time.perf_counter()
55
+ if outcome.exit_code != EXIT_SUCCESS:
56
+ raise RuntimeError(f"run_specs failed: {outcome.errors}")
57
+ times_s.append(t1 - t0)
58
+ n = len(times_s)
59
+ return {
60
+ "runs": [{"wall_s": round(t, 6)} for t in times_s],
61
+ "summary": {
62
+ "n": n,
63
+ "mean_s": round(sum(times_s) / n, 6),
64
+ "min_s": round(min(times_s), 6),
65
+ "max_s": round(max(times_s), 6),
66
+ },
67
+ }
68
+
69
+
70
+ def to_md(data: dict[str, Any]) -> str:
71
+ """Short Markdown summary of benchmark result."""
72
+ s = data["summary"]
73
+ return (
74
+ "## TRT benchmark summary\n\n"
75
+ f"- **Runs:** {s['n']}\n"
76
+ f"- **Mean:** {s['mean_s']} s\n"
77
+ f"- **Min / Max:** {s['min_s']} s / {s['max_s']} s\n"
78
+ )
@@ -0,0 +1,17 @@
1
+ from trajectly.normalize.canonical import (
2
+ DEFAULT_CANONICAL_NORMALIZER,
3
+ CanonicalNormalizer,
4
+ canonical_dumps,
5
+ normalize_for_json,
6
+ sha256_of_data,
7
+ sha256_of_subset,
8
+ )
9
+
10
+ __all__ = [
11
+ "DEFAULT_CANONICAL_NORMALIZER",
12
+ "CanonicalNormalizer",
13
+ "canonical_dumps",
14
+ "normalize_for_json",
15
+ "sha256_of_data",
16
+ "sha256_of_subset",
17
+ ]