traceredact 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {traceredact-0.2.2 → traceredact-0.2.3}/CHANGELOG.md +9 -1
  2. {traceredact-0.2.2 → traceredact-0.2.3}/PKG-INFO +10 -1
  3. {traceredact-0.2.2 → traceredact-0.2.3}/README.md +9 -0
  4. traceredact-0.2.3/examples/01_basics.py +61 -0
  5. traceredact-0.2.3/examples/02_policy.py +72 -0
  6. traceredact-0.2.3/examples/03_structured_args.py +59 -0
  7. traceredact-0.2.3/examples/04_logging_filter.py +100 -0
  8. traceredact-0.2.3/examples/05_streaming.py +53 -0
  9. traceredact-0.2.3/examples/06_before_db_or_logger.py +47 -0
  10. traceredact-0.2.3/examples/07_openai.py +43 -0
  11. traceredact-0.2.3/examples/08_langchain.py +28 -0
  12. traceredact-0.2.3/examples/09_fastapi_middleware.py +102 -0
  13. traceredact-0.2.3/examples/10_ci_gate.md +51 -0
  14. traceredact-0.2.3/examples/README.md +25 -0
  15. {traceredact-0.2.2 → traceredact-0.2.3}/pyproject.toml +1 -1
  16. {traceredact-0.2.2 → traceredact-0.2.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  17. {traceredact-0.2.2 → traceredact-0.2.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  18. {traceredact-0.2.2 → traceredact-0.2.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  19. {traceredact-0.2.2 → traceredact-0.2.3}/.github/pull_request_template.md +0 -0
  20. {traceredact-0.2.2 → traceredact-0.2.3}/.github/workflows/ci.yml +0 -0
  21. {traceredact-0.2.2 → traceredact-0.2.3}/.github/workflows/release.yml +0 -0
  22. {traceredact-0.2.2 → traceredact-0.2.3}/.gitignore +0 -0
  23. {traceredact-0.2.2 → traceredact-0.2.3}/CONTRIBUTING.md +0 -0
  24. {traceredact-0.2.2 → traceredact-0.2.3}/LICENSE +0 -0
  25. {traceredact-0.2.2 → traceredact-0.2.3}/SECURITY.md +0 -0
  26. {traceredact-0.2.2 → traceredact-0.2.3}/assets/icon.png +0 -0
  27. {traceredact-0.2.2 → traceredact-0.2.3}/assets/icon.svg +0 -0
  28. {traceredact-0.2.2 → traceredact-0.2.3}/assets/logo.png +0 -0
  29. {traceredact-0.2.2 → traceredact-0.2.3}/assets/logo.svg +0 -0
  30. {traceredact-0.2.2 → traceredact-0.2.3}/assets/social-preview.png +0 -0
  31. {traceredact-0.2.2 → traceredact-0.2.3}/assets/social-preview.svg +0 -0
  32. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/__init__.py +0 -0
  33. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/cli.py +0 -0
  34. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/detectors/__init__.py +0 -0
  35. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/detectors/base.py +0 -0
  36. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/detectors/pii.py +0 -0
  37. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/detectors/secrets.py +0 -0
  38. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/engine.py +0 -0
  39. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/integrations/__init__.py +0 -0
  40. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/integrations/anthropic.py +0 -0
  41. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/integrations/langchain.py +0 -0
  42. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/integrations/openai.py +0 -0
  43. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/policy.py +0 -0
  44. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/py.typed +0 -0
  45. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/rules.py +0 -0
  46. {traceredact-0.2.2 → traceredact-0.2.3}/src/traceredact/streaming.py +0 -0
  47. {traceredact-0.2.2 → traceredact-0.2.3}/tests/fixtures/agent_trace.json +0 -0
  48. {traceredact-0.2.2 → traceredact-0.2.3}/tests/fixtures/evasion_trace.json +0 -0
  49. {traceredact-0.2.2 → traceredact-0.2.3}/tests/fixtures/prompt.txt +0 -0
  50. {traceredact-0.2.2 → traceredact-0.2.3}/tests/test_cli.py +0 -0
  51. {traceredact-0.2.2 → traceredact-0.2.3}/tests/test_detectors.py +0 -0
  52. {traceredact-0.2.2 → traceredact-0.2.3}/tests/test_engine.py +0 -0
  53. {traceredact-0.2.2 → traceredact-0.2.3}/tests/test_evasion.py +0 -0
  54. {traceredact-0.2.2 → traceredact-0.2.3}/tests/test_extras.py +0 -0
  55. {traceredact-0.2.2 → traceredact-0.2.3}/tests/test_fixtures.py +0 -0
  56. {traceredact-0.2.2 → traceredact-0.2.3}/tests/test_integrations.py +0 -0
  57. {traceredact-0.2.2 → traceredact-0.2.3}/tests/test_policy.py +0 -0
  58. {traceredact-0.2.2 → traceredact-0.2.3}/tests/test_streaming.py +0 -0
  59. {traceredact-0.2.2 → traceredact-0.2.3}/traceredact.yml +0 -0
@@ -6,6 +6,13 @@ All notable changes to this project are documented here. The format is based on
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.2.3] — 2026-06-07
10
+
11
+ ### Added
12
+ - `examples/` — runnable, heavily-commented scenarios: drop-in `logging.Filter`,
13
+ FastAPI/ASGI body-logging middleware, streaming, redacting traces before a
14
+ DB/observability sink, custom policies, SDK wrappers, LangChain, CI gate.
15
+
9
16
  ## [0.2.2] — 2026-06-07
10
17
 
11
18
  ### Changed
@@ -80,7 +87,8 @@ Initial release.
80
87
  - Policy file (`traceredact.yml`): detector toggles, entropy thresholds,
81
88
  allowlist, custom patterns, placeholder template, optional HMAC correlation.
82
89
 
83
- [Unreleased]: https://github.com/traceredact/traceredact/compare/v0.2.2...HEAD
90
+ [Unreleased]: https://github.com/traceredact/traceredact/compare/v0.2.3...HEAD
91
+ [0.2.3]: https://github.com/traceredact/traceredact/compare/v0.2.2...v0.2.3
84
92
  [0.2.2]: https://github.com/traceredact/traceredact/compare/v0.2.1...v0.2.2
85
93
  [0.2.1]: https://github.com/traceredact/traceredact/compare/v0.2.0...v0.2.1
86
94
  [0.2.0]: https://github.com/traceredact/traceredact/compare/v0.1.3...v0.2.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: traceredact
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Redact PII and secrets from AI prompts, traces and tool-call arguments before they reach your loggers.
5
5
  Project-URL: Homepage, https://traceredact.com
6
6
  Project-URL: Documentation, https://traceredact.com
@@ -134,6 +134,15 @@ pydantic models, dataclasses and attrs instances are traversed automatically
134
134
  `Policy(decode_payloads=True)` base64-decodes blobs one layer and, if the decoded
135
135
  text contains a high-confidence secret, redacts the whole blob.
136
136
 
137
+ ## Examples
138
+
139
+ Runnable, heavily-commented scenarios live in
140
+ **[`examples/`](https://github.com/traceredact/traceredact/tree/main/examples)**:
141
+ a drop-in [`logging.Filter`](https://github.com/traceredact/traceredact/blob/main/examples/04_logging_filter.py),
142
+ [FastAPI/ASGI middleware](https://github.com/traceredact/traceredact/blob/main/examples/09_fastapi_middleware.py),
143
+ [streaming](https://github.com/traceredact/traceredact/blob/main/examples/05_streaming.py),
144
+ redacting traces before your DB/Langfuse, custom policies, and a CI gate.
145
+
137
146
  ## Policy file (`traceredact.yml`)
138
147
 
139
148
  Drop a `traceredact.yml` in your repo root (auto-discovered) or pass `--policy`:
@@ -97,6 +97,15 @@ pydantic models, dataclasses and attrs instances are traversed automatically
97
97
  `Policy(decode_payloads=True)` base64-decodes blobs one layer and, if the decoded
98
98
  text contains a high-confidence secret, redacts the whole blob.
99
99
 
100
+ ## Examples
101
+
102
+ Runnable, heavily-commented scenarios live in
103
+ **[`examples/`](https://github.com/traceredact/traceredact/tree/main/examples)**:
104
+ a drop-in [`logging.Filter`](https://github.com/traceredact/traceredact/blob/main/examples/04_logging_filter.py),
105
+ [FastAPI/ASGI middleware](https://github.com/traceredact/traceredact/blob/main/examples/09_fastapi_middleware.py),
106
+ [streaming](https://github.com/traceredact/traceredact/blob/main/examples/05_streaming.py),
107
+ redacting traces before your DB/Langfuse, custom policies, and a CI gate.
108
+
100
109
  ## Policy file (`traceredact.yml`)
101
110
 
102
111
  Drop a `traceredact.yml` in your repo root (auto-discovered) or pass `--policy`:
@@ -0,0 +1,61 @@
1
+ """01 · Basics — redact a string and a nested payload, and inspect findings.
2
+
3
+ Run it:
4
+
5
+ pip install traceredact
6
+ python examples/01_basics.py
7
+
8
+ Everything here uses only the core API; no third-party SDKs required.
9
+ """
10
+
11
+ from traceredact import redact
12
+
13
+ # ---------------------------------------------------------------------------
14
+ # 1) Redacting a free-form string.
15
+ #
16
+ # `redact()` returns a RedactionResult with three things you care about:
17
+ # - .value the redacted copy (same shape as the input)
18
+ # - .findings a list of what was found (detector id, category, json_path)
19
+ # - .has_findings True if anything was redacted (handy for CI gates)
20
+ # ---------------------------------------------------------------------------
21
+ text = "email me at alice@acme.io, my key is sk-1234567890abcdefABCDEFGH"
22
+ result = redact(text)
23
+
24
+ print("redacted :", result.value)
25
+ # -> email me at [REDACTED:pii], my key is [REDACTED:secret]
26
+ print("has_findings:", result.has_findings)
27
+ for f in result.findings:
28
+ # `preview` is a non-reversible masked sample, safe to log/print.
29
+ print(f" - {f.detector_id:<22} conf={f.confidence:<4} preview={f.preview}")
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # 2) Redacting a nested structure (e.g. an agent trace / tool-call payload).
34
+ #
35
+ # The engine walks dicts, lists, tuples and sets recursively. The INPUT IS
36
+ # NEVER MUTATED — `result.value` is a redacted copy. Each finding carries a
37
+ # `json_path` so you know exactly where the sensitive value lived.
38
+ # ---------------------------------------------------------------------------
39
+ trace = {
40
+ "user": {"email": "bob@example.com", "plan": "pro"}, # plan is untouched
41
+ "tool_call": {
42
+ "name": "charge_card",
43
+ "arguments": {"card": "4111 1111 1111 1111", "amount": 4200},
44
+ },
45
+ "config": {"openai_api_key": "sk-proj-Tt3kZ9qRsuVwXyZ012345abcd"},
46
+ }
47
+
48
+ result = redact(trace)
49
+
50
+ import json # noqa: E402 (imported here just to pretty-print the example)
51
+
52
+ print("\nredacted trace:")
53
+ print(json.dumps(result.value, indent=2))
54
+
55
+ print("\nfindings by path:")
56
+ for f in result.findings:
57
+ print(f" {f.json_path:<40} -> {f.detector_id}")
58
+
59
+ # Proof that the original object was not mutated:
60
+ assert trace["user"]["email"] == "bob@example.com"
61
+ print("\noriginal input untouched ✓")
@@ -0,0 +1,72 @@
1
+ """02 · Policy — tune what gets redacted and how.
2
+
3
+ A `Policy` is an explicit, inspectable config object. Pass it as the second
4
+ argument to `redact()`. This example walks the knobs you'll actually use.
5
+
6
+ python examples/02_policy.py
7
+ """
8
+
9
+ from traceredact import Policy, redact
10
+
11
+ SAMPLE = "user a@b.com, ip 10.0.0.5, key sk-1234567890abcdefABCDEFGH"
12
+
13
+ # ---------------------------------------------------------------------------
14
+ # 1) Turn detectors OFF you don't care about (cuts false positives / noise).
15
+ # Detector ids look like "pii.ipv4", "secrets.openai_key", etc.
16
+ # ---------------------------------------------------------------------------
17
+ policy = Policy(disabled_detectors={"pii.ipv4"})
18
+ print("ip kept:", redact(SAMPLE, policy).value)
19
+ # -> the 10.0.0.5 stays; email + key still redacted
20
+
21
+ # ...or run ONLY an explicit allow-set (everything else is skipped):
22
+ only_secrets = Policy(enabled_detectors={"secrets.openai_key"})
23
+ print("only key:", redact(SAMPLE, only_secrets).value)
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # 2) Allowlisting — never redact known-safe values (e.g. docs/test fixtures).
28
+ # `allowlist` matches exact strings; `allow_patterns` matches regexes.
29
+ # ---------------------------------------------------------------------------
30
+ policy = Policy(allow_patterns=[r".*@example\.com"])
31
+ print("allowed:", redact("real@gmail.com and demo@example.com", policy).value)
32
+ # -> demo@example.com is kept, real@gmail.com is redacted
33
+
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # 3) Custom placeholder template. `{category}` is substituted per finding.
37
+ # ---------------------------------------------------------------------------
38
+ policy = Policy(placeholder="«redacted {category}»")
39
+ print("placeholder:", redact("a@b.com", policy).value) # -> «redacted pii»
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # 4) Organisation-specific detectors via `custom_patterns`.
44
+ # Regexes are length-bounded and rejected at load if they look ReDoS-prone.
45
+ # ---------------------------------------------------------------------------
46
+ policy = Policy(custom_patterns=[
47
+ {"id": "custom.employee_id", "category": "pii", "regex": r"EMP-\d{6}"},
48
+ ])
49
+ print("custom:", redact("employee EMP-123456 logged in", policy).value)
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # 5) Correlation hashing — replace a secret with a STABLE tag so you can join
54
+ # occurrences across traces WITHOUT storing the secret. Requires a key
55
+ # (fail-closed: omitting the key raises, so you never emit unkeyed hashes).
56
+ # ---------------------------------------------------------------------------
57
+ policy = Policy(
58
+ placeholder="[REDACTED:{category}:{hash}]",
59
+ hash_correlation=True,
60
+ hash_key="load-me-from-an-env-var", # keep this out of source in real code
61
+ )
62
+ r1 = redact("key sk-1234567890abcdefABCDEFGH", policy)
63
+ r2 = redact("again sk-1234567890abcdefABCDEFGH", policy)
64
+ print("hash#1:", r1.value)
65
+ print("hash#2:", r2.value)
66
+ # Same secret -> same tag in both, so you can correlate without the plaintext.
67
+
68
+
69
+ # ---------------------------------------------------------------------------
70
+ # 6) Load a policy from a file (auto-discovered as ./traceredact.yml by the CLI).
71
+ # ---------------------------------------------------------------------------
72
+ # policy = Policy.load("traceredact.yml")
@@ -0,0 +1,59 @@
1
+ """03 · Structured tool-call arguments — the case field-name filters miss.
2
+
3
+ Agents pass nested JSON to tools, and secrets/PII end up deep inside under
4
+ arbitrary keys. traceredact walks the structure and redacts by *value*, then
5
+ tells you the exact `json_path` of every hit. It also traverses pydantic
6
+ models, dataclasses and attrs instances.
7
+
8
+ python examples/03_structured_args.py
9
+ """
10
+
11
+ from dataclasses import dataclass
12
+
13
+ from traceredact import redact
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # 1) A realistic tool-call payload. Note the secret sits under an innocuous
17
+ # key ("metadata.note") — a key-name denylist would sail right past it.
18
+ # ---------------------------------------------------------------------------
19
+ payload = {
20
+ "tool": "send_invoice",
21
+ "args": {
22
+ "to": "client@firm.co", # pii.email
23
+ "iban": "DE89 3704 0044 0532 0130 00", # pii.iban (mod-97 checked)
24
+ "items": [{"sku": "A1", "note": "ok"}], # untouched
25
+ },
26
+ "metadata": {"note": "their key is sk-1234567890abcdefABCDEFGH"}, # secret in prose
27
+ }
28
+
29
+ result = redact(payload)
30
+ for f in result.findings:
31
+ print(f"{f.json_path:<28} {f.detector_id}")
32
+ # args.to pii.email
33
+ # args.iban pii.iban
34
+ # metadata.note secrets.openai_key
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # 2) Sensitive KEY NAMES force-redact their value, even with no detectable
39
+ # signal in the value itself (e.g. a low-entropy password). This is the
40
+ # structured counterpart to scanning values.
41
+ # ---------------------------------------------------------------------------
42
+ result = redact({"password": "hunter2", "retries": 3})
43
+ print("\nby key-name:", result.value)
44
+ # -> {"password": "[REDACTED:secret]", "retries": 3} (3 is left alone)
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # 3) pydantic / dataclass / attrs instances are traversed automatically
49
+ # (redacted to a dict). Toggle with Policy(traverse_objects=False).
50
+ # ---------------------------------------------------------------------------
51
+ @dataclass
52
+ class ToolCall:
53
+ user_email: str
54
+ api_key: str
55
+
56
+
57
+ result = redact({"call": ToolCall("a@b.com", "ghp_abcdefghijklmnopqrstuvwxyz0123456789")})
58
+ print("dataclass:", result.value)
59
+ # -> {"call": {"user_email": "[REDACTED:pii]", "api_key": "[REDACTED:secret]"}}
@@ -0,0 +1,100 @@
1
+ """04 · Drop-in logging filter — redact every log record before it's emitted.
2
+
3
+ This is the highest-leverage integration: attach one `logging.Filter` and every
4
+ `logger.info(...)` across your app is redacted before it reaches any handler
5
+ (stdout, JSON logs, Datadog, files...). No call-site changes.
6
+
7
+ python examples/04_logging_filter.py
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ from typing import Any
14
+
15
+ from traceredact import Policy, redact
16
+
17
+
18
+ class RedactingFilter(logging.Filter):
19
+ """A logging.Filter that redacts a record's message, args and `extra` fields.
20
+
21
+ Key design points (these are the easy-to-get-wrong bits):
22
+
23
+ * We redact ``record.msg`` and ``record.args`` SEPARATELY, instead of calling
24
+ ``record.getMessage()``. This preserves logging's *deferred* %-formatting:
25
+ ``logger.info("token=%s", secret)`` keeps the secret in ``args`` until we
26
+ redact it, so it never gets formatted into the message in the clear.
27
+ * Redaction must NEVER raise — logging failures shouldn't crash your app — so
28
+ every redaction is wrapped and falls back to a marker.
29
+ * We deliberately skip ``exc_info``: redacting tracebacks is expensive and
30
+ fragile. If exception text can contain secrets, redact the rendered string
31
+ in a custom ``Formatter`` instead (see note at the bottom).
32
+ """
33
+
34
+ # LogRecord's own attributes — never treat these as user `extra=` data.
35
+ _RESERVED = frozenset(
36
+ "name msg args levelname levelno pathname filename module exc_info "
37
+ "exc_text stack_info lineno funcName created msecs relativeCreated "
38
+ "thread threadName processName process taskName".split()
39
+ )
40
+
41
+ def __init__(self, policy: Policy | None = None) -> None:
42
+ super().__init__()
43
+ self.policy = policy
44
+
45
+ def _redact(self, value: Any) -> Any:
46
+ try:
47
+ return redact(value, policy=self.policy).value
48
+ except Exception: # logging must never break the caller
49
+ return "[REDACTION_ERROR]"
50
+
51
+ def filter(self, record: logging.LogRecord) -> bool:
52
+ record.msg = self._redact(record.msg)
53
+
54
+ if isinstance(record.args, tuple):
55
+ record.args = tuple(self._redact(a) for a in record.args)
56
+ elif isinstance(record.args, dict): # %(name)s style
57
+ record.args = {k: self._redact(v) for k, v in record.args.items()}
58
+ elif record.args:
59
+ record.args = self._redact(record.args)
60
+
61
+ # Redact custom fields passed via logger.info(..., extra={...}).
62
+ for key, value in list(record.__dict__.items()):
63
+ if key in self._RESERVED or key.startswith("_"):
64
+ continue
65
+ record.__dict__[key] = self._redact(value)
66
+
67
+ return True # keep the record (we redacted it, we don't drop it)
68
+
69
+
70
+ if __name__ == "__main__":
71
+ import sys
72
+
73
+ # IMPORTANT: attach the filter to the HANDLER, not to a logger. A filter on a
74
+ # logger is only consulted for records logged *at that logger* — it is NOT
75
+ # applied to records propagated up from child loggers. A handler filter sees
76
+ # every record the handler emits, which is what you want for redaction.
77
+ handler = logging.StreamHandler(sys.stdout)
78
+ handler.setFormatter(logging.Formatter("%(levelname)s %(message)s"))
79
+ handler.addFilter(RedactingFilter())
80
+
81
+ root = logging.getLogger()
82
+ root.handlers.clear()
83
+ root.addHandler(handler)
84
+ root.setLevel(logging.INFO)
85
+
86
+ log = logging.getLogger("demo")
87
+
88
+ # Deferred %-formatting: the secret lives in args until the filter sees it.
89
+ log.info("calling %s with token=%s", "openai", "sk-1234567890abcdefABCDEFGH")
90
+ # dict args:
91
+ log.info("payload=%(p)s", {"p": {"email": "a@b.com", "ok": True}})
92
+ # structured extra= (avoid reserved LogRecord names like "args"/"msg"):
93
+ log.info("tool call", extra={"tool_args": {"card": "4111 1111 1111 1111"}})
94
+
95
+ # NOTE: an f-string formats BEFORE logging sees it, so the filter can't help:
96
+ # log.info(f"token={secret}") # <-- avoid; pass as an arg instead.
97
+ # For exceptions, redact the rendered text in a Formatter, e.g.:
98
+ # class RedactingFormatter(logging.Formatter):
99
+ # def format(self, record):
100
+ # return redact(super().format(record)).value
@@ -0,0 +1,53 @@
1
+ """05 · Streaming — redact token streams without buffering the whole response.
2
+
3
+ When you stream an LLM response, a secret like ``sk-...`` can be split across
4
+ several token deltas. Scanning each delta on its own would leak it at the seam.
5
+ ``redact_stream`` keeps a small carry-over window so cross-chunk secrets are
6
+ still caught.
7
+
8
+ python examples/05_streaming.py
9
+ """
10
+
11
+ import asyncio
12
+
13
+ from traceredact import StreamRedactor, redact_stream
14
+ from traceredact.streaming import redact_stream_async
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # 1) Sync: redact an iterable of text chunks. The secret below is deliberately
18
+ # split across two chunks to show the carry-over window working.
19
+ # ---------------------------------------------------------------------------
20
+ chunks = ["Here is the key sk-12345", "67890abcdefABCDEFGH — keep it safe"]
21
+ out = "".join(redact_stream(chunks))
22
+ print("streamed:", out)
23
+ assert "sk-1234567890abcdefABCDEFGH" not in out # caught across the boundary
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # 2) Manual control with StreamRedactor: feed deltas, then flush at the end.
28
+ # Use ONE redactor per stream; never share carry between conversations.
29
+ # ---------------------------------------------------------------------------
30
+ r = StreamRedactor()
31
+ emitted = []
32
+ for delta in ["my email is al", "ice@acme.io done"]:
33
+ emitted.append(r.feed(delta)) # returns redacted text safe to emit now
34
+ emitted.append(r.flush()) # flush the remaining carry at end-of-stream
35
+ print("manual :", "".join(emitted))
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # 3) Async (e.g. an OpenAI async token stream — see 07_openai.py to plug it in).
40
+ # ---------------------------------------------------------------------------
41
+ async def main() -> None:
42
+ async def token_source():
43
+ for piece in ["secret ghp_abcdefghij", "klmnopqrstuvwxyz0123456789!"]:
44
+ yield piece
45
+
46
+ parts = [p async for p in redact_stream_async(token_source())]
47
+ print("async :", "".join(parts))
48
+
49
+
50
+ asyncio.run(main())
51
+
52
+ # Tip: the carry-over window (default 512) must be >= the longest secret AND its
53
+ # context. For huge PEM blocks streamed token-by-token, prefer one-shot redact().
@@ -0,0 +1,47 @@
1
+ """06 · Redact agent traces before persisting them (DB / Langfuse / Datadog).
2
+
3
+ The privacy-by-design pattern: redact in-process *before* the trace leaves you,
4
+ so the sensitive data never lands in your store or observability vendor.
5
+
6
+ python examples/06_before_db_or_logger.py
7
+ """
8
+
9
+ from traceredact import redact
10
+
11
+ # A span you're about to insert into Postgres / ship to Langfuse / log as JSON.
12
+ span = {
13
+ "trace_id": "t_8842",
14
+ "input": {"messages": [{"role": "user", "content": "charge card 4111111111111111"}]},
15
+ "tool_calls": [
16
+ {"name": "create_payment",
17
+ "args": {"card": "4111111111111111", "email": "a@b.com"}},
18
+ ],
19
+ "retrieved_context": ["doc#1: contact bob@corp.io for the AWS key AKIAIOSFODNN7EXAMPLE"],
20
+ "output": "Done. A receipt was sent.",
21
+ }
22
+
23
+
24
+ def store(_span: dict) -> None:
25
+ """Stand-in for your real sink: db.insert(...) / langfuse.trace(...) / log.info(...)."""
26
+
27
+
28
+ # --- the one line that matters -------------------------------------------------
29
+ result = redact(span)
30
+ store(result.value) # only the redacted copy is ever persisted
31
+ # ------------------------------------------------------------------------------
32
+
33
+ print(f"persisted with {len(result.findings)} value(s) redacted:")
34
+ for f in result.findings:
35
+ print(f" {f.json_path} ({f.detector_id})")
36
+
37
+ # Optional: emit a privacy metric / alert when secrets show up in traces.
38
+ if any(f.category == "secret" for f in result.findings):
39
+ print("\n⚠️ secret detected in a trace — investigate the source.")
40
+
41
+ # Belt-and-braces check you can assert in tests: no original survived.
42
+ import json # noqa: E402
43
+
44
+ blob = json.dumps(result.value)
45
+ for leaked in ("4111111111111111", "a@b.com", "AKIAIOSFODNN7EXAMPLE", "bob@corp.io"):
46
+ assert leaked not in blob
47
+ print("\nzero originals present in the stored payload ✓")
@@ -0,0 +1,43 @@
1
+ """07 · OpenAI wrapper — redact prompts going out and content coming back.
2
+
3
+ `wrap_openai(client)` patches `chat.completions.create` so:
4
+ * outbound `messages` are redacted before the request leaves your process, and
5
+ * the returned assistant `content` is redacted before it reaches your logs.
6
+
7
+ Requires the OpenAI SDK: pip install "traceredact[openai]"
8
+
9
+ This file is illustrative — it needs real credentials to actually call the API.
10
+ """
11
+
12
+ from traceredact import Policy
13
+ from traceredact.integrations.openai import wrap_openai
14
+
15
+ # --- sync ---------------------------------------------------------------------
16
+ # from openai import OpenAI
17
+ # client = wrap_openai(OpenAI(), policy=Policy()) # policy is optional
18
+ #
19
+ # resp = client.chat.completions.create(
20
+ # model="gpt-4o-mini",
21
+ # messages=[{"role": "user", "content": "my email is a@b.com, summarise it"}],
22
+ # )
23
+ # The model receives the REDACTED prompt, and resp.choices[0].message.content
24
+ # is redacted before you log it.
25
+
26
+ # --- async --------------------------------------------------------------------
27
+ # from openai import AsyncOpenAI
28
+ # from traceredact.integrations.openai import wrap_async_openai
29
+ # client = wrap_async_openai(AsyncOpenAI())
30
+ # resp = await client.chat.completions.create(model="gpt-4o-mini", messages=[...])
31
+
32
+ # --- streaming ----------------------------------------------------------------
33
+ # For stream=True, wrap the returned async stream to get redacted text pieces
34
+ # (carry-over across token deltas):
35
+ #
36
+ # from traceredact.integrations.openai import redact_content_stream
37
+ # stream = await AsyncOpenAI().chat.completions.create(model="gpt-4o-mini",
38
+ # messages=[...], stream=True)
39
+ # async for safe_text in redact_content_stream(stream):
40
+ # log.info(safe_text) # only redacted text is ever logged
41
+
42
+ _ = (wrap_openai, Policy) # keep the imports referenced for a clean `python -c`
43
+ print("See the comments — this example needs the OpenAI SDK + credentials to run.")
@@ -0,0 +1,28 @@
1
+ """08 · LangChain — redact prompts flowing through the callback system.
2
+
3
+ Attach `RedactingCallbackHandler` and the prompts your chains/agents send are
4
+ redacted, with findings collected for inspection. Degrades gracefully if
5
+ `langchain-core` isn't installed (so importing never hard-fails).
6
+
7
+ Requires: pip install "traceredact[langchain]"
8
+ """
9
+
10
+ from traceredact.integrations.langchain import RedactingCallbackHandler
11
+
12
+ handler = RedactingCallbackHandler()
13
+
14
+ # Pass it to any LangChain call:
15
+ # llm.invoke("...", config={"callbacks": [handler]})
16
+ # chain.invoke({...}, config={"callbacks": [handler]})
17
+ #
18
+ # After a run, the redacted prompts and findings are available on the handler.
19
+
20
+ # --- standalone demo (no LangChain needed) ------------------------------------
21
+ handler.on_llm_start({}, ["my OpenAI key is sk-1234567890abcdefABCDEFGH"])
22
+ print("redacted prompt :", handler.redacted_prompts[0])
23
+ print("findings :", [f.detector_id for f in handler.findings])
24
+ # redacted prompt : my OpenAI key is [REDACTED:secret]
25
+ # findings : ['secrets.openai_key']
26
+
27
+ # You can also redact arbitrary text yourself via the handler:
28
+ print("ad-hoc :", handler.redact_text("ping a@b.com"))
@@ -0,0 +1,102 @@
1
+ """09 · FastAPI / ASGI middleware — log redacted request & response bodies.
2
+
3
+ This middleware logs a REDACTED COPY of JSON request/response bodies. It does
4
+ NOT modify what the app receives or what the client gets back — it only sanitises
5
+ what you log. Pure-ASGI so it works with FastAPI, Starlette, etc.
6
+
7
+ Requires Starlette (bundled with FastAPI): pip install "traceredact" starlette
8
+
9
+ Pitfalls handled below:
10
+ * only JSON content-types are parsed (skip multipart/files/SSE/protobuf/gzip)
11
+ * bodies are bounded (`max_body_bytes`) so a huge upload can't blow memory
12
+ * original ASGI messages are forwarded UNCHANGED — never break the response
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import logging
19
+ from typing import Any
20
+
21
+ from traceredact import Policy, redact
22
+
23
+ # These types come from Starlette; imported lazily so the file is importable
24
+ # even without it installed (the class is illustrative).
25
+ try:
26
+ from starlette.types import ASGIApp, Message, Receive, Scope, Send
27
+ except Exception: # pragma: no cover
28
+ ASGIApp = Message = Receive = Scope = Send = Any # type: ignore
29
+
30
+
31
+ class RedactedBodyLoggingMiddleware:
32
+ def __init__(self, app: ASGIApp, logger: logging.Logger,
33
+ policy: Policy | None = None, max_body_bytes: int = 1_000_000) -> None:
34
+ self.app = app
35
+ self.logger = logger
36
+ self.policy = policy
37
+ self.max_body_bytes = max_body_bytes
38
+
39
+ def _is_json(self, headers: list[tuple[bytes, bytes]]) -> bool:
40
+ for k, v in headers:
41
+ if k.lower() == b"content-type":
42
+ return b"application/json" in v.lower() or b"+json" in v.lower()
43
+ return False
44
+
45
+ def _redacted_json(self, body: bytes) -> Any:
46
+ if not body or len(body) > self.max_body_bytes:
47
+ return {"_skipped": True, "bytes": len(body)}
48
+ try:
49
+ parsed = json.loads(body)
50
+ except (ValueError, UnicodeDecodeError):
51
+ return {"_unparsed": True}
52
+ return redact(parsed, policy=self.policy).value # redact the parsed body
53
+
54
+ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
55
+ if scope["type"] != "http":
56
+ await self.app(scope, receive, send) # pass through websockets etc.
57
+ return
58
+
59
+ req_chunks: list[bytes] = []
60
+ res_chunks: list[bytes] = []
61
+ res_status = 0
62
+ res_headers: list[tuple[bytes, bytes]] = []
63
+
64
+ async def wrapped_receive() -> Message:
65
+ message = await receive()
66
+ if message["type"] == "http.request":
67
+ if sum(map(len, req_chunks)) <= self.max_body_bytes:
68
+ req_chunks.append(message.get("body", b""))
69
+ return message # forward the ORIGINAL message to the app
70
+
71
+ async def wrapped_send(message: Message) -> None:
72
+ nonlocal res_status, res_headers
73
+ if message["type"] == "http.response.start":
74
+ res_status = message["status"]
75
+ res_headers = list(message.get("headers", []))
76
+ elif message["type"] == "http.response.body":
77
+ if sum(map(len, res_chunks)) <= self.max_body_bytes:
78
+ res_chunks.append(message.get("body", b""))
79
+ if not message.get("more_body", False):
80
+ self.logger.info("http_exchange", extra={
81
+ "method": scope.get("method"),
82
+ "path": scope.get("path"),
83
+ "status": res_status,
84
+ "request_body": self._redacted_json(b"".join(req_chunks))
85
+ if self._is_json(list(scope.get("headers", []))) else None,
86
+ "response_body": self._redacted_json(b"".join(res_chunks))
87
+ if self._is_json(res_headers) else None,
88
+ })
89
+ await send(message) # forward the ORIGINAL message to the client
90
+
91
+ await self.app(scope, wrapped_receive, wrapped_send)
92
+
93
+
94
+ # Usage:
95
+ # app.add_middleware(RedactedBodyLoggingMiddleware,
96
+ # logger=logging.getLogger("http"), policy=Policy())
97
+ #
98
+ # For true token-by-token streaming responses, log via redact_stream/
99
+ # redact_content_stream instead (this middleware logs after the body completes).
100
+
101
+ if __name__ == "__main__":
102
+ print("Illustrative ASGI middleware — wire it into a FastAPI/Starlette app.")
@@ -0,0 +1,51 @@
1
+ # 10 · CI gate — fail the build if secrets/PII appear
2
+
3
+ `traceredact scan` exits **non-zero** when it finds anything, so you can gate a
4
+ job on it. Point it at fixtures, prompt files, exported traces, log dumps — any
5
+ file or directory.
6
+
7
+ ## Locally
8
+
9
+ ```bash
10
+ traceredact scan ./tests/fixtures/ # pretty table; exit 1 if findings
11
+ traceredact scan trace.json --format json # machine-readable for tooling
12
+ traceredact scan . --policy traceredact.yml
13
+ ```
14
+
15
+ ## GitHub Actions
16
+
17
+ ```yaml
18
+ name: redaction-check
19
+ on: [pull_request]
20
+ jobs:
21
+ scan:
22
+ runs-on: ubuntu-latest
23
+ steps:
24
+ - uses: actions/checkout@v4
25
+ - run: pipx install traceredact
26
+ # Fails the PR if any secret/PII is committed under these paths.
27
+ - run: traceredact scan ./fixtures ./examples --format json
28
+ ```
29
+
30
+ ## pre-commit hook
31
+
32
+ ```yaml
33
+ # .pre-commit-config.yaml
34
+ repos:
35
+ - repo: local
36
+ hooks:
37
+ - id: traceredact
38
+ name: traceredact scan
39
+ entry: traceredact scan
40
+ language: system
41
+ pass_filenames: true
42
+ ```
43
+
44
+ ## Redact a file in place (CI artifact sanitisation)
45
+
46
+ ```bash
47
+ traceredact redact prod-trace.json --output prod-trace.redacted.json
48
+ ```
49
+
50
+ Use `traceredact.yml` to tune detectors/allowlist so the gate matches your repo
51
+ (see `../traceredact.yml` for a fully-commented policy).
@@ -0,0 +1,25 @@
1
+ # Examples
2
+
3
+ Runnable, heavily-commented examples for common scenarios. Install first:
4
+
5
+ ```bash
6
+ pip install traceredact # core
7
+ pip install "traceredact[openai]" "traceredact[langchain]" # for the SDK examples
8
+ ```
9
+
10
+ Run any pure-Python example directly, e.g. `python examples/01_basics.py`.
11
+
12
+ | # | File | Scenario |
13
+ |---|------|----------|
14
+ | 01 | [`01_basics.py`](01_basics.py) | Redact a string & a nested payload; inspect findings |
15
+ | 02 | [`02_policy.py`](02_policy.py) | Tune detectors, allowlist, custom patterns, placeholder, correlation hashing |
16
+ | 03 | [`03_structured_args.py`](03_structured_args.py) | Tool-call args by JSON path; sensitive keys; pydantic/dataclass |
17
+ | 04 | [`04_logging_filter.py`](04_logging_filter.py) | **Drop-in `logging.Filter`** — redact every log record, no call-site changes |
18
+ | 05 | [`05_streaming.py`](05_streaming.py) | Redact streamed token deltas (secrets that span chunks) |
19
+ | 06 | [`06_before_db_or_logger.py`](06_before_db_or_logger.py) | Redact agent traces before DB / Langfuse / Datadog |
20
+ | 07 | [`07_openai.py`](07_openai.py) | OpenAI wrapper (sync / async / streaming) |
21
+ | 08 | [`08_langchain.py`](08_langchain.py) | LangChain `RedactingCallbackHandler` |
22
+ | 09 | [`09_fastapi_middleware.py`](09_fastapi_middleware.py) | ASGI/FastAPI middleware: log redacted request/response bodies |
23
+ | 10 | [`10_ci_gate.md`](10_ci_gate.md) | `traceredact scan` as a CI gate / pre-commit hook |
24
+
25
+ Pure-Python (no SDK needed): 01–06. Illustrative (need an SDK/framework): 07–09.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "traceredact"
3
- version = "0.2.2"
3
+ version = "0.2.3"
4
4
  description = "Redact PII and secrets from AI prompts, traces and tool-call arguments before they reach your loggers."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes