tracely-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tracely/redaction.py ADDED
@@ -0,0 +1,196 @@
1
+ """Smart data redaction for the TRACELY SDK (FR8, FR11, NFR11).
2
+
3
+ Provides field-name based redaction for JSON bodies, header value
4
+ redaction for sensitive headers, and pattern-based redaction for
5
+ credit card numbers, email addresses, and SSNs.
6
+
7
+ All public functions are fail-silent — exceptions are caught and logged
8
+ at DEBUG level to satisfy the SDK's zero-crash guarantee.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ import re
16
+ from typing import Any
17
+
18
+ logger = logging.getLogger("tracely")
19
+
20
+ # Default sensitive field names (AC1) — matched case-insensitively
21
+ SENSITIVE_FIELDS: frozenset[str] = frozenset({
22
+ "password",
23
+ "secret",
24
+ "token",
25
+ "authorization",
26
+ "api_key",
27
+ "credit_card",
28
+ "ssn",
29
+ })
30
+
31
+ # Default sensitive header names (AC2) — matched case-insensitively
32
+ SENSITIVE_HEADERS: frozenset[str] = frozenset({
33
+ "authorization",
34
+ "cookie",
35
+ "set-cookie",
36
+ "x-api-key",
37
+ })
38
+
39
+ REDACTED: str = "[REDACTED]"
40
+
41
+ # Pattern-based redaction (AC3) — compiled regexes for performance
42
+ # Credit card: 16 digits, optionally separated by dashes or spaces
43
+ _CC_PATTERN: re.Pattern[str] = re.compile(
44
+ r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b"
45
+ )
46
+ # SSN: XXX-XX-XXXX format
47
+ _SSN_PATTERN: re.Pattern[str] = re.compile(
48
+ r"\b\d{3}-\d{2}-\d{4}\b"
49
+ )
50
+ # Email: standard email pattern
51
+ _EMAIL_PATTERN: re.Pattern[str] = re.compile(
52
+ r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
53
+ )
54
+
55
+
56
+ # Module-level state for custom redaction fields (set via configure_redaction)
57
+ _extra_fields: frozenset[str] = frozenset()
58
+
59
+
60
+ def configure_redaction(
61
+ *,
62
+ extra_fields: frozenset[str] = frozenset(),
63
+ ) -> None:
64
+ """Configure module-level redaction settings.
65
+
66
+ Called by SDK init to propagate ``TRACELY_REDACT_FIELDS`` config.
67
+
68
+ Args:
69
+ extra_fields: Additional field names to redact (additive to defaults).
70
+ """
71
+ global _extra_fields
72
+ _extra_fields = extra_fields
73
+
74
+
75
+ def get_extra_fields() -> frozenset[str]:
76
+ """Return the currently configured extra redaction fields."""
77
+ return _extra_fields
78
+
79
+
80
+ def _redact_value(obj: Any, sensitive: frozenset[str]) -> Any:
81
+ """Recursively redact sensitive field values in a parsed JSON structure.
82
+
83
+ Field name matching is case-insensitive. Field names are preserved;
84
+ only values are replaced with ``[REDACTED]``.
85
+ """
86
+ if isinstance(obj, dict):
87
+ return {
88
+ key: REDACTED if key.lower() in sensitive else _redact_value(value, sensitive)
89
+ for key, value in obj.items()
90
+ }
91
+ if isinstance(obj, list):
92
+ return [_redact_value(item, sensitive) for item in obj]
93
+ return obj
94
+
95
+
96
+ def redact_body(
97
+ body: str,
98
+ *,
99
+ extra_fields: frozenset[str] | None = None,
100
+ ) -> str:
101
+ """Redact sensitive field values in a JSON body string.
102
+
103
+ Args:
104
+ body: The body string (may or may not be valid JSON).
105
+ extra_fields: Additional field names to redact (merged with defaults).
106
+
107
+ Returns:
108
+ The body with sensitive field values replaced by ``[REDACTED]``,
109
+ or the original body unchanged if it is not valid JSON.
110
+ """
111
+ if not body:
112
+ return body
113
+
114
+ sensitive = SENSITIVE_FIELDS
115
+ if extra_fields:
116
+ sensitive = SENSITIVE_FIELDS | extra_fields
117
+
118
+ try:
119
+ parsed = json.loads(body)
120
+ except (json.JSONDecodeError, ValueError):
121
+ return body
122
+
123
+ try:
124
+ redacted = _redact_value(parsed, sensitive)
125
+ # Only re-serialize if redaction actually changed something;
126
+ # otherwise preserve original body formatting exactly.
127
+ if redacted == parsed:
128
+ return body
129
+ return json.dumps(redacted)
130
+ except Exception:
131
+ logger.debug("Error during body redaction", exc_info=True)
132
+ return body
133
+
134
+
135
+ def redact_headers(
136
+ headers: dict[str, str] | list[tuple[bytes, bytes]] | None,
137
+ ) -> dict[str, str]:
138
+ """Redact sensitive header values.
139
+
140
+ Args:
141
+ headers: Headers as dict, ASGI-style byte tuples, or None.
142
+
143
+ Returns:
144
+ A new dict with sensitive header values replaced by ``[REDACTED]``.
145
+ Non-sensitive headers are preserved as-is.
146
+ """
147
+ if headers is None:
148
+ return {}
149
+
150
+ try:
151
+ if isinstance(headers, dict):
152
+ if not headers:
153
+ return {}
154
+ return {
155
+ key: REDACTED if key.lower() in SENSITIVE_HEADERS else value
156
+ for key, value in headers.items()
157
+ }
158
+
159
+ # ASGI-style list of (name_bytes, value_bytes) tuples
160
+ result: dict[str, str] = {}
161
+ for name, value in headers:
162
+ key = name.decode("utf-8", errors="replace") if isinstance(name, bytes) else str(name)
163
+ val = value.decode("utf-8", errors="replace") if isinstance(value, bytes) else str(value)
164
+ result[key] = REDACTED if key.lower() in SENSITIVE_HEADERS else val
165
+ return result
166
+ except Exception:
167
+ logger.debug("Error during header redaction", exc_info=True)
168
+ return {}
169
+
170
+
171
+ def redact_patterns(text: str) -> str:
172
+ """Apply pattern-based redaction for credit card, email, and SSN.
173
+
174
+ Scans the input text and replaces matches with typed placeholders:
175
+ ``[REDACTED:credit_card]``, ``[REDACTED:email]``, ``[REDACTED:ssn]``.
176
+
177
+ Args:
178
+ text: The text string to scan.
179
+
180
+ Returns:
181
+ The text with matched patterns replaced, or the original text
182
+ if no patterns are found or an error occurs.
183
+ """
184
+ if not text:
185
+ return text
186
+
187
+ try:
188
+ # Order matters: SSN before CC to avoid SSN being partially matched
189
+ # as a CC sub-pattern. SSN is more specific (XXX-XX-XXXX).
190
+ result = _SSN_PATTERN.sub("[REDACTED:ssn]", text)
191
+ result = _CC_PATTERN.sub("[REDACTED:credit_card]", result)
192
+ result = _EMAIL_PATTERN.sub("[REDACTED:email]", result)
193
+ return result
194
+ except Exception:
195
+ logger.debug("Error during pattern redaction", exc_info=True)
196
+ return text
tracely/sdk.py ADDED
@@ -0,0 +1,192 @@
1
+ """Core SDK initialization and lifecycle."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ from tracely.config import TracelyConfig
8
+ from tracely.detection import FrameworkInfo, detect_framework
9
+ from tracely.exporter import BatchSpanExporter
10
+ from tracely.instrumentation import get_instrumentor
11
+ from tracely.instrumentation.base import BaseInstrumentor
12
+ from tracely.redaction import configure_redaction
13
+ from tracely.span_processor import SpanProcessor, set_processor
14
+ from tracely.transport import HttpTransport, SpanBuffer
15
+
16
+ logger = logging.getLogger("tracely")
17
+
18
+ _instance: TracelySdk | None = None
19
+
20
+
21
+ class TracelySdk:
22
+ """Singleton managing SDK state and lifecycle."""
23
+
24
+ def __init__(
25
+ self,
26
+ config: TracelyConfig,
27
+ framework_info: FrameworkInfo | None = None,
28
+ instrumentor: BaseInstrumentor | None = None,
29
+ buffer: SpanBuffer | None = None,
30
+ transport: HttpTransport | None = None,
31
+ processor: SpanProcessor | None = None,
32
+ exporter: BatchSpanExporter | None = None,
33
+ ) -> None:
34
+ self.config = config
35
+ self.enabled = config.enabled
36
+ self.framework_info = framework_info
37
+ self.instrumentor = instrumentor
38
+ self.buffer = buffer
39
+ self.transport = transport
40
+ self.processor = processor
41
+ self.exporter = exporter
42
+
43
+ def shutdown(self) -> None:
44
+ """Flush buffers, deactivate instrumentation, release resources."""
45
+ # Stop batch exporter (flushes remaining spans)
46
+ if self.exporter is not None:
47
+ try:
48
+ self.exporter.stop()
49
+ except Exception:
50
+ logger.debug("Error stopping exporter", exc_info=True)
51
+
52
+ # Deactivate instrumentation
53
+ if self.instrumentor is not None:
54
+ try:
55
+ self.instrumentor.deactivate()
56
+ except Exception:
57
+ logger.debug("Error deactivating instrumentor", exc_info=True)
58
+
59
+ # Clear global processor
60
+ set_processor(None)
61
+
62
+ self.enabled = False
63
+
64
+
65
+ def init(
66
+ *,
67
+ api_key: str | None = None,
68
+ environment: str | None = None,
69
+ endpoint: str | None = None,
70
+ service_name: str | None = None,
71
+ service_version: str | None = None,
72
+ ) -> None:
73
+ """Initialize the TRACELY SDK.
74
+
75
+ Reads configuration from environment variables by default.
76
+ Explicit parameters override env vars.
77
+
78
+ When enabled (API key present), creates the full export pipeline:
79
+ SpanBuffer → SpanProcessor → BatchSpanExporter → HttpTransport → OTLP/HTTP
80
+
81
+ Args:
82
+ api_key: Override TRACELY_API_KEY env var.
83
+ environment: Override ENVIRONMENT env var.
84
+ endpoint: Override TRACELY_ENDPOINT env var.
85
+ service_name: Label for this service (e.g., "api", "celery-worker").
86
+ service_version: Version string for this service.
87
+ """
88
+ global _instance
89
+
90
+ if _instance is not None:
91
+ return
92
+
93
+ config = TracelyConfig.from_env()
94
+
95
+ if api_key is not None:
96
+ config.api_key = api_key
97
+ if environment is not None:
98
+ config.environment = environment
99
+ if endpoint is not None:
100
+ config.endpoint = endpoint
101
+ if service_name is not None:
102
+ config.service_name = service_name
103
+ if service_version is not None:
104
+ config.service_version = service_version
105
+
106
+ # Configure smart data redaction with custom fields from env (FR8, FR11)
107
+ configure_redaction(extra_fields=config.redact_fields)
108
+
109
+ if not config.enabled:
110
+ logger.warning(
111
+ "TRACELY_API_KEY not set. SDK is disabled — "
112
+ "no telemetry will be collected or sent."
113
+ )
114
+
115
+ # Detect framework (always, even when disabled — for diagnostics)
116
+ framework_info = detect_framework()
117
+
118
+ # Create export pipeline when SDK is enabled
119
+ buffer: SpanBuffer | None = None
120
+ transport: HttpTransport | None = None
121
+ processor: SpanProcessor | None = None
122
+ exporter: BatchSpanExporter | None = None
123
+
124
+ if config.enabled and config.api_key:
125
+ buffer = SpanBuffer()
126
+ transport = HttpTransport(
127
+ endpoint=config.endpoint,
128
+ api_key=config.api_key,
129
+ )
130
+ exporter = BatchSpanExporter(buffer=buffer, transport=transport)
131
+ processor = SpanProcessor(buffer=buffer, on_buffer_ready=exporter.notify)
132
+
133
+ # Register global processor for middleware to use
134
+ set_processor(processor)
135
+
136
+ # Start background export thread
137
+ try:
138
+ exporter.start()
139
+ except Exception:
140
+ logger.debug("Error starting batch exporter", exc_info=True)
141
+
142
+ # Activate instrumentation only when SDK is enabled
143
+ instrumentor: BaseInstrumentor | None = None
144
+ if config.enabled and framework_info is not None:
145
+ instrumentor = get_instrumentor(framework_info)
146
+ if instrumentor is not None:
147
+ try:
148
+ instrumentor.activate()
149
+ except Exception:
150
+ logger.debug("Error activating instrumentor", exc_info=True)
151
+ instrumentor = None
152
+
153
+ if framework_info is not None:
154
+ logger.info("TRACELY: Detected framework: %s", framework_info.name)
155
+ else:
156
+ logger.info(
157
+ "TRACELY: No supported framework detected. "
158
+ "Use manual instrumentation for custom setups."
159
+ )
160
+
161
+ _instance = TracelySdk(
162
+ config=config,
163
+ framework_info=framework_info,
164
+ instrumentor=instrumentor,
165
+ buffer=buffer,
166
+ transport=transport,
167
+ processor=processor,
168
+ exporter=exporter,
169
+ )
170
+
171
+
172
+ def shutdown() -> None:
173
+ """Gracefully shut down the SDK, flushing any buffered data."""
174
+ global _instance
175
+
176
+ if _instance is not None:
177
+ _instance.shutdown()
178
+ _instance = None
179
+
180
+
181
+ def _sdk_instance() -> TracelySdk | None:
182
+ """Return the current SDK instance (for testing)."""
183
+ return _instance
184
+
185
+
186
+ def _reset() -> None:
187
+ """Reset SDK state (for testing only)."""
188
+ global _instance
189
+ if _instance is not None:
190
+ _instance.shutdown()
191
+ _instance = None
192
+ set_processor(None)
tracely/span.py ADDED
@@ -0,0 +1,168 @@
1
+ """Span model and trace ID generation for structured tracing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import time
8
+ from typing import Any, Callable
9
+
10
+ logger = logging.getLogger("tracely")
11
+
12
+
13
+ def generate_trace_id() -> str:
14
+ """Generate a unique trace ID (32 lowercase hex characters)."""
15
+ return os.urandom(16).hex()
16
+
17
+
18
+ def generate_span_id() -> str:
19
+ """Generate a unique span ID (16 lowercase hex characters)."""
20
+ return os.urandom(8).hex()
21
+
22
+
23
+ class Span:
24
+ """Represents a single span in a distributed trace.
25
+
26
+ A root span has no parent. Child spans inherit trace_id from
27
+ their parent and set parent_span_id accordingly.
28
+
29
+ Args:
30
+ name: Human-readable operation name (e.g., "GET /api/users").
31
+ parent: Optional parent span — child inherits trace_id.
32
+ span_type: "span" (default) or "pending_span" for AR3 pattern.
33
+ kind: Span kind: INTERNAL, SERVER, CLIENT, PRODUCER, CONSUMER.
34
+ service_name: Name of the service producing this span.
35
+ on_end: Callback invoked when span ends, receiving the span.
36
+ """
37
+
38
+ __slots__ = (
39
+ "trace_id",
40
+ "span_id",
41
+ "parent_span_id",
42
+ "name",
43
+ "span_type",
44
+ "kind",
45
+ "service_name",
46
+ "start_time",
47
+ "end_time",
48
+ "duration_ms",
49
+ "status_code",
50
+ "status_message",
51
+ "attributes",
52
+ "events",
53
+ "_on_end",
54
+ "_ended",
55
+ )
56
+
57
+ def __init__(
58
+ self,
59
+ name: str,
60
+ *,
61
+ parent: Span | None = None,
62
+ span_type: str = "span",
63
+ kind: str = "INTERNAL",
64
+ service_name: str | None = None,
65
+ on_end: Callable[[Span], None] | None = None,
66
+ ) -> None:
67
+ if parent is not None:
68
+ self.trace_id = parent.trace_id
69
+ self.parent_span_id = parent.span_id
70
+ else:
71
+ self.trace_id = generate_trace_id()
72
+ self.parent_span_id: str | None = None
73
+
74
+ self.span_id = generate_span_id()
75
+ self.name = name
76
+ self.span_type = span_type
77
+ self.kind = kind
78
+ self.service_name = service_name
79
+ self.start_time: float = time.time()
80
+ self.end_time: float | None = None
81
+ self.duration_ms: float | None = None
82
+ self.status_code: str = "UNSET"
83
+ self.status_message: str = ""
84
+ self.attributes: dict[str, str] = {}
85
+ self.events: list[dict[str, Any]] = []
86
+ self._on_end = on_end
87
+ self._ended = False
88
+
89
+ def set_attribute(self, key: str, value: Any) -> None:
90
+ """Attach a key-value attribute to the span.
91
+
92
+ Values are converted to strings. No-op if span is already ended.
93
+ """
94
+ if self._ended:
95
+ return
96
+ self.attributes[key] = str(value)
97
+
98
+ def add_event(
99
+ self,
100
+ message: str,
101
+ *,
102
+ level: str = "INFO",
103
+ attributes: dict[str, str] | None = None,
104
+ ) -> None:
105
+ """Add a log event to this span.
106
+
107
+ No-op if span is already ended.
108
+
109
+ Args:
110
+ message: Human-readable event message.
111
+ level: Log level (DEBUG, INFO, WARNING, ERROR).
112
+ attributes: Optional key-value metadata for this event.
113
+ """
114
+ if self._ended:
115
+ return
116
+ self.events.append({
117
+ "timestamp": time.time(),
118
+ "level": level,
119
+ "message": message,
120
+ "attributes": dict(attributes) if attributes else {},
121
+ })
122
+
123
+ def set_status(self, code: str, message: str = "") -> None:
124
+ """Set the span's status code and optional message.
125
+
126
+ No-op if span is already ended.
127
+ """
128
+ if self._ended:
129
+ return
130
+ self.status_code = code
131
+ self.status_message = message
132
+
133
+ def end(self) -> None:
134
+ """Finalize the span, computing duration and invoking on_end callback.
135
+
136
+ Idempotent — second call is a no-op.
137
+ """
138
+ if self._ended:
139
+ return
140
+ self._ended = True
141
+ self.end_time = time.time()
142
+ self.duration_ms = (self.end_time - self.start_time) * 1000
143
+
144
+ if self._on_end is not None:
145
+ try:
146
+ self._on_end(self)
147
+ except Exception:
148
+ logger.debug("Error in span on_end callback", exc_info=True)
149
+
150
+ def to_dict(self) -> dict[str, Any]:
151
+ """Serialize the span to a dict suitable for transport."""
152
+ return {
153
+ "trace_id": self.trace_id,
154
+ "span_id": self.span_id,
155
+ "parent_span_id": self.parent_span_id,
156
+ "span_name": self.name,
157
+ "span_type": self.span_type,
158
+ "kind": self.kind,
159
+ "service_name": self.service_name,
160
+ "start_time": self.start_time,
161
+ "end_time": self.end_time,
162
+ "duration_ms": self.duration_ms,
163
+ "status_code": self.status_code,
164
+ "status_message": self.status_message,
165
+ "attributes": dict(self.attributes),
166
+ "events": list(self.events),
167
+ "event_count": len(self.events),
168
+ }
@@ -0,0 +1,110 @@
1
+ """Span processor for pending span pattern (AR3).
2
+
3
+ Exports spans to SpanBuffer on both start (pending_span) and end (span),
4
+ enabling real-time dashboard updates for in-progress requests.
5
+
6
+ Also provides a global processor registry so middleware and the tracing API
7
+ can call on_start/on_end without direct references to the processor instance.
8
+
9
+ All operations are fail-silent — never crashes the host application.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ from typing import TYPE_CHECKING, Callable
16
+
17
+ if TYPE_CHECKING:
18
+ from tracely.span import Span
19
+ from tracely.transport import SpanBuffer
20
+
21
+ logger = logging.getLogger("tracely")
22
+
23
+ # Global processor instance, set by SDK init()
24
+ _processor: SpanProcessor | None = None
25
+
26
+
27
+ def set_processor(processor: SpanProcessor | None) -> None:
28
+ """Register the global span processor (called by SDK init)."""
29
+ global _processor
30
+ _processor = processor
31
+
32
+
33
+ def get_processor() -> SpanProcessor | None:
34
+ """Get the global span processor."""
35
+ return _processor
36
+
37
+
38
+ def on_span_start(span: Span) -> None:
39
+ """Notify the global processor that a span has started.
40
+
41
+ No-op if no processor is registered (SDK disabled).
42
+ """
43
+ proc = _processor
44
+ if proc is not None:
45
+ proc.on_start(span)
46
+
47
+
48
+ def on_span_end(span: Span) -> None:
49
+ """Notify the global processor that a span has ended.
50
+
51
+ No-op if no processor is registered (SDK disabled).
52
+ Suitable as Span's on_end callback.
53
+ """
54
+ proc = _processor
55
+ if proc is not None:
56
+ proc.on_end(span)
57
+
58
+
59
+ class SpanProcessor:
60
+ """Processes span lifecycle events and enqueues them to SpanBuffer.
61
+
62
+ Args:
63
+ buffer: SpanBuffer to enqueue span dicts into.
64
+ on_buffer_ready: Optional callback invoked when the buffer reaches
65
+ the batch threshold (e.g. to wake the exporter).
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ buffer: SpanBuffer,
71
+ on_buffer_ready: Callable[[], None] | None = None,
72
+ ) -> None:
73
+ self._buffer = buffer
74
+ self._on_buffer_ready = on_buffer_ready
75
+
76
+ def _maybe_notify(self) -> None:
77
+ """Call the notify callback if the buffer has reached batch threshold."""
78
+ if self._on_buffer_ready is not None and self._buffer.is_ready:
79
+ try:
80
+ self._on_buffer_ready()
81
+ except Exception:
82
+ logger.debug("Error in buffer-ready callback", exc_info=True)
83
+
84
+ def on_start(self, span: Span) -> None:
85
+ """Export a pending_span when a span starts.
86
+
87
+ Enqueues a snapshot of the span with span_type="pending_span"
88
+ so the dashboard can show in-progress requests.
89
+ """
90
+ try:
91
+ d = span.to_dict()
92
+ d["span_type"] = "pending_span"
93
+ self._buffer.enqueue(d)
94
+ self._maybe_notify()
95
+ except Exception:
96
+ logger.debug("Error in SpanProcessor.on_start", exc_info=True)
97
+
98
+ def on_end(self, span: Span) -> None:
99
+ """Export the final span when a span ends.
100
+
101
+ Enqueues the completed span with span_type="span".
102
+ Suitable as Span's on_end callback.
103
+ """
104
+ try:
105
+ d = span.to_dict()
106
+ d["span_type"] = "span"
107
+ self._buffer.enqueue(d)
108
+ self._maybe_notify()
109
+ except Exception:
110
+ logger.debug("Error in SpanProcessor.on_end", exc_info=True)