spanforge 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +815 -0
- spanforge/_ansi.py +93 -0
- spanforge/_batch_exporter.py +409 -0
- spanforge/_cli.py +2094 -0
- spanforge/_cli_audit.py +639 -0
- spanforge/_cli_compliance.py +711 -0
- spanforge/_cli_cost.py +243 -0
- spanforge/_cli_ops.py +791 -0
- spanforge/_cli_phase11.py +356 -0
- spanforge/_hooks.py +337 -0
- spanforge/_server.py +1708 -0
- spanforge/_span.py +1036 -0
- spanforge/_store.py +288 -0
- spanforge/_stream.py +664 -0
- spanforge/_trace.py +335 -0
- spanforge/_tracer.py +254 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +469 -0
- spanforge/auto.py +464 -0
- spanforge/baseline.py +335 -0
- spanforge/cache.py +635 -0
- spanforge/compliance.py +325 -0
- spanforge/config.py +532 -0
- spanforge/consent.py +228 -0
- spanforge/consumer.py +377 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1254 -0
- spanforge/cost.py +600 -0
- spanforge/debug.py +548 -0
- spanforge/deprecations.py +205 -0
- spanforge/drift.py +482 -0
- spanforge/egress.py +58 -0
- spanforge/eval.py +648 -0
- spanforge/event.py +1064 -0
- spanforge/exceptions.py +240 -0
- spanforge/explain.py +178 -0
- spanforge/export/__init__.py +69 -0
- spanforge/export/append_only.py +337 -0
- spanforge/export/cloud.py +357 -0
- spanforge/export/datadog.py +497 -0
- spanforge/export/grafana.py +320 -0
- spanforge/export/jsonl.py +195 -0
- spanforge/export/openinference.py +158 -0
- spanforge/export/otel_bridge.py +294 -0
- spanforge/export/otlp.py +811 -0
- spanforge/export/otlp_bridge.py +233 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/siem_schema.py +98 -0
- spanforge/export/siem_splunk.py +264 -0
- spanforge/export/siem_syslog.py +212 -0
- spanforge/export/webhook.py +299 -0
- spanforge/exporters/__init__.py +30 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/exporters/sqlite.py +142 -0
- spanforge/gate.py +1150 -0
- spanforge/governance.py +181 -0
- spanforge/hitl.py +295 -0
- spanforge/http.py +187 -0
- spanforge/inspect.py +427 -0
- spanforge/integrations/__init__.py +45 -0
- spanforge/integrations/_pricing.py +280 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/azure_openai.py +133 -0
- spanforge/integrations/bedrock.py +292 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +351 -0
- spanforge/integrations/groq.py +442 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/langgraph.py +306 -0
- spanforge/integrations/llamaindex.py +373 -0
- spanforge/integrations/ollama.py +287 -0
- spanforge/integrations/openai.py +368 -0
- spanforge/integrations/together.py +483 -0
- spanforge/io.py +214 -0
- spanforge/lint.py +322 -0
- spanforge/metrics.py +417 -0
- spanforge/metrics_export.py +343 -0
- spanforge/migrate.py +402 -0
- spanforge/model_registry.py +278 -0
- spanforge/models.py +389 -0
- spanforge/namespaces/__init__.py +254 -0
- spanforge/namespaces/audit.py +256 -0
- spanforge/namespaces/cache.py +237 -0
- spanforge/namespaces/chain.py +77 -0
- spanforge/namespaces/confidence.py +72 -0
- spanforge/namespaces/consent.py +92 -0
- spanforge/namespaces/cost.py +179 -0
- spanforge/namespaces/decision.py +143 -0
- spanforge/namespaces/diff.py +157 -0
- spanforge/namespaces/drift.py +80 -0
- spanforge/namespaces/eval_.py +251 -0
- spanforge/namespaces/feedback.py +241 -0
- spanforge/namespaces/fence.py +193 -0
- spanforge/namespaces/guard.py +105 -0
- spanforge/namespaces/hitl.py +91 -0
- spanforge/namespaces/latency.py +72 -0
- spanforge/namespaces/prompt.py +190 -0
- spanforge/namespaces/redact.py +173 -0
- spanforge/namespaces/retrieval.py +379 -0
- spanforge/namespaces/runtime_governance.py +494 -0
- spanforge/namespaces/template.py +208 -0
- spanforge/namespaces/tool_call.py +77 -0
- spanforge/namespaces/trace.py +1029 -0
- spanforge/normalizer.py +171 -0
- spanforge/plugins.py +82 -0
- spanforge/presidio_backend.py +349 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +418 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +914 -0
- spanforge/regression.py +192 -0
- spanforge/runtime_policy.py +159 -0
- spanforge/sampling.py +511 -0
- spanforge/schema.py +183 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/sdk/__init__.py +625 -0
- spanforge/sdk/_base.py +584 -0
- spanforge/sdk/_base.pyi +71 -0
- spanforge/sdk/_exceptions.py +1096 -0
- spanforge/sdk/_types.py +2184 -0
- spanforge/sdk/alert.py +1514 -0
- spanforge/sdk/alert.pyi +56 -0
- spanforge/sdk/audit.py +1196 -0
- spanforge/sdk/audit.pyi +67 -0
- spanforge/sdk/cec.py +1215 -0
- spanforge/sdk/cec.pyi +37 -0
- spanforge/sdk/config.py +641 -0
- spanforge/sdk/config.pyi +55 -0
- spanforge/sdk/enterprise.py +714 -0
- spanforge/sdk/enterprise.pyi +79 -0
- spanforge/sdk/explain.py +170 -0
- spanforge/sdk/fallback.py +432 -0
- spanforge/sdk/feedback.py +351 -0
- spanforge/sdk/gate.py +874 -0
- spanforge/sdk/gate.pyi +51 -0
- spanforge/sdk/identity.py +2114 -0
- spanforge/sdk/identity.pyi +47 -0
- spanforge/sdk/lineage.py +175 -0
- spanforge/sdk/observe.py +1065 -0
- spanforge/sdk/observe.pyi +50 -0
- spanforge/sdk/operator.py +338 -0
- spanforge/sdk/pii.py +1473 -0
- spanforge/sdk/pii.pyi +119 -0
- spanforge/sdk/pipelines.py +458 -0
- spanforge/sdk/pipelines.pyi +39 -0
- spanforge/sdk/policy.py +930 -0
- spanforge/sdk/rag.py +594 -0
- spanforge/sdk/rbac.py +280 -0
- spanforge/sdk/registry.py +430 -0
- spanforge/sdk/registry.pyi +46 -0
- spanforge/sdk/scope.py +279 -0
- spanforge/sdk/secrets.py +293 -0
- spanforge/sdk/secrets.pyi +25 -0
- spanforge/sdk/security.py +560 -0
- spanforge/sdk/security.pyi +57 -0
- spanforge/sdk/trust.py +472 -0
- spanforge/sdk/trust.pyi +41 -0
- spanforge/secrets.py +799 -0
- spanforge/signing.py +1179 -0
- spanforge/stats.py +100 -0
- spanforge/stream.py +560 -0
- spanforge/testing.py +378 -0
- spanforge/testing_mocks.py +1052 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +300 -0
- spanforge/validate.py +379 -0
- spanforge-1.0.0.dist-info/METADATA +1509 -0
- spanforge-1.0.0.dist-info/RECORD +174 -0
- spanforge-1.0.0.dist-info/WHEEL +4 -0
- spanforge-1.0.0.dist-info/entry_points.txt +5 -0
- spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/sdk/observe.py
ADDED
|
@@ -0,0 +1,1065 @@
|
|
|
1
|
+
"""spanforge.sdk.observe — SpanForge sf-observe Observability Named SDK (Phase 6).
|
|
2
|
+
|
|
3
|
+
Implements the full sf-observe API surface for Phase 6 of the SpanForge roadmap.
|
|
4
|
+
All operations run locally in-process (zero external dependencies beyond the
|
|
5
|
+
standard library) when ``config.endpoint`` is empty or the remote service is
|
|
6
|
+
unreachable and ``local_fallback_enabled`` is ``True``.
|
|
7
|
+
|
|
8
|
+
Architecture
|
|
9
|
+
------------
|
|
10
|
+
* :meth:`emit_span` is the **primary entry point** for emitting a single span.
|
|
11
|
+
It generates W3C TraceContext identifiers, applies OTel GenAI semantic
|
|
12
|
+
conventions, samples the span according to the configured strategy, and
|
|
13
|
+
routes it through :meth:`export_spans`.
|
|
14
|
+
* :meth:`export_spans` accepts a list of pre-built span dicts, enriches them
|
|
15
|
+
with OTel resource attributes, applies the configured backend exporter
|
|
16
|
+
(local buffer / OTLP / Datadog / Grafana / Splunk / Elastic), and returns
|
|
17
|
+
an :class:`~spanforge.sdk._types.ExportResult`.
|
|
18
|
+
* :meth:`add_annotation` stores a timestamped annotation in a thread-safe
|
|
19
|
+
in-memory store. :meth:`get_annotations` queries it.
|
|
20
|
+
* :meth:`get_status` returns health and session statistics.
|
|
21
|
+
|
|
22
|
+
OTel GenAI semantic conventions supported (OBS-010)
|
|
23
|
+
-----------------------------------------------------
|
|
24
|
+
All ``gen_ai.*`` attributes as defined in the OpenTelemetry GenAI specification:
|
|
25
|
+
|
|
26
|
+
* ``gen_ai.system`` — AI system (e.g. ``"openai"``)
|
|
27
|
+
* ``gen_ai.request.model`` — model identifier
|
|
28
|
+
* ``gen_ai.request.max_tokens`` — token budget
|
|
29
|
+
* ``gen_ai.request.temperature`` — temperature
|
|
30
|
+
* ``gen_ai.response.model`` — model used in the response
|
|
31
|
+
* ``gen_ai.response.id`` — response identifier
|
|
32
|
+
* ``gen_ai.response.finish_reasons`` — comma-separated finish reasons
|
|
33
|
+
* ``gen_ai.usage.input_tokens`` — prompt token count
|
|
34
|
+
* ``gen_ai.usage.output_tokens`` — completion token count
|
|
35
|
+
* ``gen_ai.operation.name`` — operation type
|
|
36
|
+
|
|
37
|
+
W3C TraceContext propagation (OBS-011, OBS-012)
|
|
38
|
+
------------------------------------------------
|
|
39
|
+
Every emitted span contains a ``traceparent`` attribute in the format::
|
|
40
|
+
|
|
41
|
+
00-<32-hex trace_id>-<16-hex span_id>-<flags>
|
|
42
|
+
|
|
43
|
+
Baggage propagation inserts ``project_id``, ``domain``, and ``tier`` when
|
|
44
|
+
present in ``attributes``.
|
|
45
|
+
|
|
46
|
+
Sampling strategies (OBS-031)
|
|
47
|
+
------------------------------
|
|
48
|
+
Configured via ``SPANFORGE_OBSERVE_SAMPLER`` environment variable:
|
|
49
|
+
|
|
50
|
+
* ``always_on`` — export every span.
|
|
51
|
+
* ``always_off`` — export no spans.
|
|
52
|
+
* ``parent_based`` — respect parent sampling bit in incoming ``traceparent``;
|
|
53
|
+
default to :attr:`~spanforge.sdk._types.SamplerStrategy.ALWAYS_ON`
|
|
54
|
+
when no parent.
|
|
55
|
+
* ``trace_id_ratio`` — deterministic fraction of traces using SHA-256 hash of
|
|
56
|
+
trace_id; ratio set by ``SPANFORGE_OBSERVE_SAMPLE_RATE``.
|
|
57
|
+
|
|
58
|
+
Backend exporters (OBS-001, OBS-040 through OBS-042)
|
|
59
|
+
-----------------------------------------------------
|
|
60
|
+
Configured via ``SPANFORGE_OBSERVE_BACKEND`` environment variable:
|
|
61
|
+
|
|
62
|
+
* ``local`` — buffer spans in a bounded in-memory deque (no network, default).
|
|
63
|
+
* ``otlp`` — POST to ``config.endpoint/v1/traces`` as OTLP JSON.
|
|
64
|
+
* ``datadog`` — POST to Datadog APM intake (``/api/v0.2/traces``).
|
|
65
|
+
* ``grafana`` — POST to Grafana Tempo ingest (``/api/v1/push``).
|
|
66
|
+
* ``splunk`` — POST to Splunk HEC (``/services/collector``).
|
|
67
|
+
* ``elastic`` — POST to Elastic APM Server (``/_bulk``).
|
|
68
|
+
|
|
69
|
+
Health probes (OBS-043)
|
|
70
|
+
------------------------
|
|
71
|
+
:attr:`healthy` is ``True`` when the last export succeeded (or no export has
|
|
72
|
+
been attempted). :attr:`last_export_at` is an ISO-8601 UTC timestamp.
|
|
73
|
+
|
|
74
|
+
Security requirements
|
|
75
|
+
---------------------
|
|
76
|
+
* API keys and signing keys are **never** logged or included in exception
|
|
77
|
+
messages.
|
|
78
|
+
* SSRF: all remote endpoints are validated with the same ``_validate_http_url``
|
|
79
|
+
guard used in the existing OTLP exporter.
|
|
80
|
+
* Thread-safety: all in-memory counters and annotation stores use locks.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
from __future__ import annotations
|
|
84
|
+
|
|
85
|
+
import hashlib
|
|
86
|
+
import ipaddress
|
|
87
|
+
import json
|
|
88
|
+
import logging
|
|
89
|
+
import os
|
|
90
|
+
import threading
|
|
91
|
+
import urllib.error
|
|
92
|
+
import urllib.parse
|
|
93
|
+
import urllib.request
|
|
94
|
+
import uuid
|
|
95
|
+
from dataclasses import dataclass, field
|
|
96
|
+
from datetime import datetime, timezone
|
|
97
|
+
from typing import Any
|
|
98
|
+
|
|
99
|
+
from spanforge.sdk._base import SFClientConfig, SFServiceClient
|
|
100
|
+
from spanforge.sdk._exceptions import (
|
|
101
|
+
SFObserveAnnotationError,
|
|
102
|
+
SFObserveEmitError,
|
|
103
|
+
SFObserveError, # noqa: F401 (re-exported for callers)
|
|
104
|
+
SFObserveExportError,
|
|
105
|
+
)
|
|
106
|
+
from spanforge.sdk._types import (
|
|
107
|
+
Annotation,
|
|
108
|
+
ExportResult,
|
|
109
|
+
ObserveStatusInfo,
|
|
110
|
+
ReceiverConfig,
|
|
111
|
+
SamplerStrategy,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
__all__ = ["SFObserveClient"]
|
|
115
|
+
|
|
116
|
+
_log = logging.getLogger(__name__)
|
|
117
|
+
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
# Constants
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
#: W3C TraceContext version byte.
|
|
123
|
+
_TRACEPARENT_VERSION = "00"
|
|
124
|
+
|
|
125
|
+
#: OTel resource attributes injected into every exported span batch (OBS-014).
|
|
126
|
+
_OTEL_RESOURCE_ATTRIBUTES: dict[str, str] = {
|
|
127
|
+
"service.name": "spanforge",
|
|
128
|
+
"service.version": "2.0.0",
|
|
129
|
+
"telemetry.sdk.language": "python",
|
|
130
|
+
"telemetry.sdk.name": "spanforge-sdk",
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
#: OTel GenAI attribute key prefix (OBS-010).
|
|
134
|
+
_GEN_AI_PREFIX = "gen_ai."
|
|
135
|
+
|
|
136
|
+
#: Recognised OTel GenAI attribute keys.
|
|
137
|
+
_GEN_AI_ATTRIBUTE_KEYS: frozenset[str] = frozenset(
|
|
138
|
+
{
|
|
139
|
+
"gen_ai.system",
|
|
140
|
+
"gen_ai.request.model",
|
|
141
|
+
"gen_ai.request.max_tokens",
|
|
142
|
+
"gen_ai.request.temperature",
|
|
143
|
+
"gen_ai.response.model",
|
|
144
|
+
"gen_ai.response.id",
|
|
145
|
+
"gen_ai.response.finish_reasons",
|
|
146
|
+
"gen_ai.usage.input_tokens",
|
|
147
|
+
"gen_ai.usage.output_tokens",
|
|
148
|
+
"gen_ai.operation.name",
|
|
149
|
+
}
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
#: Supported backend identifiers.
|
|
153
|
+
SUPPORTED_BACKENDS: frozenset[str] = frozenset(
|
|
154
|
+
{
|
|
155
|
+
"local",
|
|
156
|
+
"otlp",
|
|
157
|
+
"datadog",
|
|
158
|
+
"grafana",
|
|
159
|
+
"splunk",
|
|
160
|
+
"elastic",
|
|
161
|
+
"redis",
|
|
162
|
+
"webhook",
|
|
163
|
+
"cloud",
|
|
164
|
+
"syslog",
|
|
165
|
+
"jsonl",
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
#: Maximum spans retained in the local buffer.
|
|
170
|
+
_LOCAL_BUFFER_MAX: int = 10_000
|
|
171
|
+
|
|
172
|
+
#: W3C traceparent flag: sampled.
|
|
173
|
+
_SAMPLED_FLAG = "01"
|
|
174
|
+
|
|
175
|
+
#: W3C traceparent flag: not sampled.
|
|
176
|
+
_NOT_SAMPLED_FLAG = "00"
|
|
177
|
+
|
|
178
|
+
# ---------------------------------------------------------------------------
|
|
179
|
+
# SSRF guard
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _is_private_ip_literal(host: str) -> bool:
|
|
184
|
+
"""Return ``True`` if *host* is a private/loopback/link-local literal IP.
|
|
185
|
+
|
|
186
|
+
DNS hostnames are NOT resolved. Only literal IPv4/IPv6 addresses are
|
|
187
|
+
evaluated. Set ``allow_private_endpoints=True`` in non-production
|
|
188
|
+
environments when targeting private endpoints by hostname.
|
|
189
|
+
"""
|
|
190
|
+
try:
|
|
191
|
+
addr = ipaddress.ip_address(host)
|
|
192
|
+
except ValueError:
|
|
193
|
+
return False
|
|
194
|
+
return addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_multicast
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _validate_http_url(
|
|
198
|
+
url: str,
|
|
199
|
+
*,
|
|
200
|
+
allow_private_addresses: bool = False,
|
|
201
|
+
) -> None:
|
|
202
|
+
"""Raise :exc:`ValueError` if *url* is not a valid ``http://``/``https://`` URL.
|
|
203
|
+
|
|
204
|
+
Also rejects literal private IP addresses unless *allow_private_addresses*
|
|
205
|
+
is ``True``.
|
|
206
|
+
"""
|
|
207
|
+
parsed = urllib.parse.urlparse(url)
|
|
208
|
+
if parsed.scheme not in ("http", "https"):
|
|
209
|
+
raise ValueError(f"Endpoint URL must use http:// or https://; got scheme={parsed.scheme!r}")
|
|
210
|
+
host = parsed.hostname or ""
|
|
211
|
+
if not host:
|
|
212
|
+
raise ValueError(f"Endpoint URL has no host: {url!r}")
|
|
213
|
+
if not allow_private_addresses and _is_private_ip_literal(host):
|
|
214
|
+
raise ValueError(
|
|
215
|
+
f"Endpoint URL {url!r} resolves to a private/loopback address. "
|
|
216
|
+
"Set allow_private_endpoints=True for non-production use."
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
# ---------------------------------------------------------------------------
|
|
221
|
+
# Internal session statistics
|
|
222
|
+
# ---------------------------------------------------------------------------
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@dataclass
|
|
226
|
+
class _ObserveSessionStats:
|
|
227
|
+
"""Mutable session counters (all accesses must hold ``_lock``)."""
|
|
228
|
+
|
|
229
|
+
span_count: int = 0
|
|
230
|
+
annotation_count: int = 0
|
|
231
|
+
export_count: int = 0
|
|
232
|
+
last_export_at: str | None = None
|
|
233
|
+
healthy: bool = True
|
|
234
|
+
_lock: threading.Lock = field(default_factory=threading.Lock, compare=False, repr=False)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
# ---------------------------------------------------------------------------
|
|
238
|
+
# Sampling helpers (OBS-031)
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _should_sample(
|
|
243
|
+
strategy: SamplerStrategy,
|
|
244
|
+
sample_rate: float,
|
|
245
|
+
trace_id_hex: str,
|
|
246
|
+
parent_sampled: bool | None,
|
|
247
|
+
) -> bool:
|
|
248
|
+
"""Return ``True`` when the span should be exported under *strategy*.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
strategy: Active :class:`~spanforge.sdk._types.SamplerStrategy`.
|
|
252
|
+
sample_rate: Fraction in ``[0.0, 1.0]`` used by
|
|
253
|
+
:attr:`~spanforge.sdk._types.SamplerStrategy.TRACE_ID_RATIO`.
|
|
254
|
+
trace_id_hex: 32-hex trace identifier.
|
|
255
|
+
parent_sampled: Parent's sampling decision, or ``None`` if no parent.
|
|
256
|
+
"""
|
|
257
|
+
if strategy == SamplerStrategy.ALWAYS_OFF:
|
|
258
|
+
return False
|
|
259
|
+
if strategy == SamplerStrategy.ALWAYS_ON:
|
|
260
|
+
return True
|
|
261
|
+
if strategy == SamplerStrategy.PARENT_BASED:
|
|
262
|
+
if parent_sampled is None:
|
|
263
|
+
return True # no parent → sample by default
|
|
264
|
+
return parent_sampled
|
|
265
|
+
# TRACE_ID_RATIO: deterministic hash-based decision.
|
|
266
|
+
hash_int = int(hashlib.sha256(trace_id_hex.encode()).hexdigest()[:16], 16)
|
|
267
|
+
max_val = 0xFFFF_FFFF_FFFF_FFFF
|
|
268
|
+
return (hash_int / max_val) < sample_rate
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# ---------------------------------------------------------------------------
|
|
272
|
+
# W3C TraceContext helpers (OBS-011)
|
|
273
|
+
# ---------------------------------------------------------------------------
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def make_traceparent(trace_id_hex: str, span_id_hex: str, *, sampled: bool = True) -> str:
|
|
277
|
+
"""Build a W3C ``traceparent`` header value.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
trace_id_hex: 32-character hex string (128-bit trace ID).
|
|
281
|
+
span_id_hex: 16-character hex string (64-bit span ID).
|
|
282
|
+
sampled: Whether the sampling flag should be set.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
A string of the form ``"00-<trace_id>-<span_id>-<flags>"``.
|
|
286
|
+
|
|
287
|
+
Raises:
|
|
288
|
+
ValueError: If the IDs are not valid hex strings of the expected length.
|
|
289
|
+
"""
|
|
290
|
+
if len(trace_id_hex) != 32: # noqa: PLR2004
|
|
291
|
+
raise ValueError(f"trace_id_hex must be 32 hex chars; got {len(trace_id_hex)}")
|
|
292
|
+
if len(span_id_hex) != 16: # noqa: PLR2004
|
|
293
|
+
raise ValueError(f"span_id_hex must be 16 hex chars; got {len(span_id_hex)}")
|
|
294
|
+
int(trace_id_hex, 16) # raises ValueError if not valid hex
|
|
295
|
+
int(span_id_hex, 16) # raises ValueError if not valid hex
|
|
296
|
+
flags = _SAMPLED_FLAG if sampled else _NOT_SAMPLED_FLAG
|
|
297
|
+
return f"{_TRACEPARENT_VERSION}-{trace_id_hex}-{span_id_hex}-{flags}"
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def extract_traceparent(traceparent: str) -> tuple[str, str, bool]:
|
|
301
|
+
"""Parse a W3C ``traceparent`` header value.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
A 3-tuple of ``(trace_id_hex, span_id_hex, sampled)``.
|
|
305
|
+
|
|
306
|
+
Raises:
|
|
307
|
+
ValueError: If *traceparent* does not conform to the W3C spec.
|
|
308
|
+
"""
|
|
309
|
+
parts = traceparent.split("-")
|
|
310
|
+
_expected_parts = 4
|
|
311
|
+
if len(parts) != _expected_parts:
|
|
312
|
+
raise ValueError(
|
|
313
|
+
f"traceparent must have 4 '-'-separated parts; got {len(parts)}: {traceparent!r}"
|
|
314
|
+
)
|
|
315
|
+
_version, trace_id, span_id, flags = parts
|
|
316
|
+
if len(trace_id) != 32: # noqa: PLR2004
|
|
317
|
+
raise ValueError(f"trace_id must be 32 hex chars; got {len(trace_id)}")
|
|
318
|
+
if len(span_id) != 16: # noqa: PLR2004
|
|
319
|
+
raise ValueError(f"span_id must be 16 hex chars; got {len(span_id)}")
|
|
320
|
+
sampled = flags == _SAMPLED_FLAG
|
|
321
|
+
return trace_id, span_id, sampled
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def _generate_trace_id() -> str:
|
|
325
|
+
"""Return a random 32-hex trace ID."""
|
|
326
|
+
return uuid.uuid4().hex + uuid.uuid4().hex[:0] # 32 hex chars from uuid4
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _generate_span_id() -> str:
|
|
330
|
+
"""Return a random 16-hex span ID."""
|
|
331
|
+
return uuid.uuid4().hex[:16]
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
# ---------------------------------------------------------------------------
|
|
335
|
+
# OTel span builder (OBS-010, OBS-014, OBS-015)
|
|
336
|
+
# ---------------------------------------------------------------------------
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _build_otel_span(
|
|
340
|
+
name: str,
|
|
341
|
+
attributes: dict[str, Any],
|
|
342
|
+
trace_id: str,
|
|
343
|
+
span_id: str,
|
|
344
|
+
*,
|
|
345
|
+
sampled: bool = True,
|
|
346
|
+
) -> dict[str, Any]:
|
|
347
|
+
"""Construct an OTLP-compatible span dict.
|
|
348
|
+
|
|
349
|
+
Normalises ``gen_ai.*`` attributes and injects OTel resource attributes.
|
|
350
|
+
Sets ``otel.status_code = "ERROR"`` when ``attributes["status"] == "error"``
|
|
351
|
+
or ``attributes["otel.status_code"] == "ERROR"`` (OBS-015).
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
name: Span name.
|
|
355
|
+
attributes: User-supplied span attributes. ``gen_ai.*`` keys are kept
|
|
356
|
+
as-is; all other keys are also forwarded unchanged.
|
|
357
|
+
trace_id: 32-hex trace identifier.
|
|
358
|
+
span_id: 16-hex span identifier.
|
|
359
|
+
sampled: Whether to set the W3C sampled flag.
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
A span dict with ``name``, ``traceId``, ``spanId``, ``traceparent``,
|
|
363
|
+
``startTimeUnixNano``, ``endTimeUnixNano``, ``status``, ``attributes``,
|
|
364
|
+
and ``resource`` fields.
|
|
365
|
+
"""
|
|
366
|
+
now_ns = int(datetime.now(timezone.utc).timestamp() * 1_000_000_000)
|
|
367
|
+
span_attrs: dict[str, Any] = {}
|
|
368
|
+
|
|
369
|
+
# Normalise gen_ai.* attributes (OBS-010)
|
|
370
|
+
span_attrs = dict(attributes)
|
|
371
|
+
|
|
372
|
+
# OBS-015: Error span detection
|
|
373
|
+
is_error = (
|
|
374
|
+
str(attributes.get("status", "")).lower() == "error"
|
|
375
|
+
or str(attributes.get("otel.status_code", "")).upper() == "ERROR"
|
|
376
|
+
)
|
|
377
|
+
if is_error:
|
|
378
|
+
span_attrs["otel.status_code"] = "ERROR"
|
|
379
|
+
status_code = "STATUS_CODE_ERROR"
|
|
380
|
+
status_message = str(attributes.get("exception.message", "error"))
|
|
381
|
+
else:
|
|
382
|
+
span_attrs.setdefault("otel.status_code", "OK")
|
|
383
|
+
status_code = "STATUS_CODE_OK"
|
|
384
|
+
status_message = ""
|
|
385
|
+
|
|
386
|
+
# W3C TraceContext (OBS-011)
|
|
387
|
+
traceparent = make_traceparent(trace_id, span_id, sampled=sampled)
|
|
388
|
+
span_attrs["traceparent"] = traceparent
|
|
389
|
+
|
|
390
|
+
# W3C Baggage (OBS-012) — project_id, domain, tier
|
|
391
|
+
baggage_parts = [
|
|
392
|
+
f"{k}={attributes[k]}" for k in ("project_id", "domain", "tier") if k in attributes
|
|
393
|
+
]
|
|
394
|
+
if baggage_parts:
|
|
395
|
+
span_attrs["baggage"] = ",".join(baggage_parts)
|
|
396
|
+
|
|
397
|
+
return {
|
|
398
|
+
"name": name,
|
|
399
|
+
"traceId": trace_id,
|
|
400
|
+
"spanId": span_id,
|
|
401
|
+
"traceparent": traceparent,
|
|
402
|
+
"startTimeUnixNano": now_ns,
|
|
403
|
+
"endTimeUnixNano": now_ns,
|
|
404
|
+
"status": {"code": status_code, "message": status_message},
|
|
405
|
+
"attributes": span_attrs,
|
|
406
|
+
"resource": {
|
|
407
|
+
"attributes": {
|
|
408
|
+
**_OTEL_RESOURCE_ATTRIBUTES,
|
|
409
|
+
"deployment.environment": os.environ.get("SPANFORGE_ENV", "production"),
|
|
410
|
+
}
|
|
411
|
+
},
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
# ---------------------------------------------------------------------------
|
|
416
|
+
# Backend exporters (OBS-040 through OBS-042)
|
|
417
|
+
# ---------------------------------------------------------------------------
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _post_json(
|
|
421
|
+
url: str,
|
|
422
|
+
payload: dict[str, Any],
|
|
423
|
+
headers: dict[str, str],
|
|
424
|
+
*,
|
|
425
|
+
timeout_seconds: float = 30.0,
|
|
426
|
+
) -> None:
|
|
427
|
+
"""POST *payload* as JSON to *url*.
|
|
428
|
+
|
|
429
|
+
Raises:
|
|
430
|
+
SFObserveExportError: On any HTTP or network error.
|
|
431
|
+
"""
|
|
432
|
+
body = json.dumps(payload, default=str).encode()
|
|
433
|
+
req = urllib.request.Request(url, data=body, method="POST") # noqa: S310
|
|
434
|
+
req.add_header("Content-Type", "application/json")
|
|
435
|
+
for name, value in headers.items():
|
|
436
|
+
req.add_header(name, value)
|
|
437
|
+
try:
|
|
438
|
+
with urllib.request.urlopen(req, timeout=timeout_seconds) as resp: # noqa: S310 # nosec B310
|
|
439
|
+
_ = resp.read()
|
|
440
|
+
except urllib.error.HTTPError as exc:
|
|
441
|
+
raise SFObserveExportError(f"HTTP {exc.code} from {url}: {exc.reason}") from exc
|
|
442
|
+
except OSError as exc:
|
|
443
|
+
raise SFObserveExportError(f"Network error posting to {url}: {exc}") from exc
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _build_otlp_payload(
|
|
447
|
+
spans: list[dict[str, Any]],
|
|
448
|
+
) -> dict[str, Any]:
|
|
449
|
+
"""Wrap *spans* in an OTLP ``/v1/traces`` JSON envelope."""
|
|
450
|
+
span_list: list[dict[str, Any]] = []
|
|
451
|
+
for s in spans:
|
|
452
|
+
attrs = [
|
|
453
|
+
{"key": k, "value": {"stringValue": str(v)}} for k, v in s.get("attributes", {}).items()
|
|
454
|
+
]
|
|
455
|
+
resource_attrs = [
|
|
456
|
+
{"key": k, "value": {"stringValue": str(v)}}
|
|
457
|
+
for k, v in s.get("resource", {}).get("attributes", {}).items()
|
|
458
|
+
]
|
|
459
|
+
span_list.append(
|
|
460
|
+
{
|
|
461
|
+
"traceId": s.get("traceId", ""),
|
|
462
|
+
"spanId": s.get("spanId", ""),
|
|
463
|
+
"name": s.get("name", ""),
|
|
464
|
+
"startTimeUnixNano": str(s.get("startTimeUnixNano", 0)),
|
|
465
|
+
"endTimeUnixNano": str(s.get("endTimeUnixNano", 0)),
|
|
466
|
+
"status": s.get("status", {}),
|
|
467
|
+
"attributes": attrs,
|
|
468
|
+
}
|
|
469
|
+
)
|
|
470
|
+
return {
|
|
471
|
+
"resourceSpans": [
|
|
472
|
+
{
|
|
473
|
+
"resource": {"attributes": resource_attrs if spans else []},
|
|
474
|
+
"scopeSpans": [
|
|
475
|
+
{
|
|
476
|
+
"scope": {"name": "spanforge-sdk"},
|
|
477
|
+
"spans": span_list,
|
|
478
|
+
}
|
|
479
|
+
],
|
|
480
|
+
}
|
|
481
|
+
]
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
# ---------------------------------------------------------------------------
|
|
486
|
+
# SFObserveClient
|
|
487
|
+
# ---------------------------------------------------------------------------
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
class SFObserveClient(SFServiceClient):
|
|
491
|
+
"""SpanForge sf-observe client.
|
|
492
|
+
|
|
493
|
+
Provides span emission, annotation storage, and export routing for the
|
|
494
|
+
Phase 6 observability SDK.
|
|
495
|
+
|
|
496
|
+
Configuration is read from :class:`~spanforge.sdk._base.SFClientConfig`
|
|
497
|
+
and the following additional environment variables:
|
|
498
|
+
|
|
499
|
+
+-----------------------------------+-----------------------------------+-------------------+
|
|
500
|
+
| Variable | Meaning | Default |
|
|
501
|
+
+===================================+===================================+===================+
|
|
502
|
+
| ``SPANFORGE_OBSERVE_BACKEND`` | Exporter backend | ``"local"`` |
|
|
503
|
+
+-----------------------------------+-----------------------------------+-------------------+
|
|
504
|
+
| ``SPANFORGE_OBSERVE_SAMPLER`` | SamplerStrategy label | ``"always_on"`` |
|
|
505
|
+
+-----------------------------------+-----------------------------------+-------------------+
|
|
506
|
+
| ``SPANFORGE_OBSERVE_SAMPLE_RATE`` | Float ``[0.0, 1.0]`` for ratio | ``1.0`` |
|
|
507
|
+
+-----------------------------------+-----------------------------------+-------------------+
|
|
508
|
+
| ``SPANFORGE_ENV`` | ``deployment.environment`` value | ``"production"`` |
|
|
509
|
+
+-----------------------------------+-----------------------------------+-------------------+
|
|
510
|
+
|
|
511
|
+
Thread safety
|
|
512
|
+
-------------
|
|
513
|
+
All public methods are thread-safe. The annotation store and session
|
|
514
|
+
statistics are protected by ``threading.Lock``.
|
|
515
|
+
|
|
516
|
+
Example::
|
|
517
|
+
|
|
518
|
+
from spanforge.sdk import sf_observe
|
|
519
|
+
|
|
520
|
+
span_id = sf_observe.emit_span(
|
|
521
|
+
"chat.completion",
|
|
522
|
+
{
|
|
523
|
+
"gen_ai.system": "openai",
|
|
524
|
+
"gen_ai.request.model": "gpt-4o",
|
|
525
|
+
"gen_ai.usage.input_tokens": 512,
|
|
526
|
+
},
|
|
527
|
+
)
|
|
528
|
+
annotation_id = sf_observe.add_annotation(
|
|
529
|
+
"model_deployed",
|
|
530
|
+
{"model": "gpt-4o", "version": "2024-11"},
|
|
531
|
+
project_id="my-project",
|
|
532
|
+
)
|
|
533
|
+
status = sf_observe.get_status()
|
|
534
|
+
"""
|
|
535
|
+
|
|
536
|
+
def __init__(self, config: SFClientConfig) -> None:
|
|
537
|
+
super().__init__(config, service_name="observe")
|
|
538
|
+
|
|
539
|
+
# Resolve backend
|
|
540
|
+
raw_backend = os.environ.get("SPANFORGE_OBSERVE_BACKEND", "local").lower()
|
|
541
|
+
self._backend: str = raw_backend if raw_backend in SUPPORTED_BACKENDS else "local"
|
|
542
|
+
|
|
543
|
+
# Resolve sampler strategy
|
|
544
|
+
raw_sampler = os.environ.get("SPANFORGE_OBSERVE_SAMPLER", SamplerStrategy.ALWAYS_ON.value)
|
|
545
|
+
try:
|
|
546
|
+
self._sampler_strategy: SamplerStrategy = SamplerStrategy(raw_sampler)
|
|
547
|
+
except ValueError:
|
|
548
|
+
_log.warning(
|
|
549
|
+
"Unknown SPANFORGE_OBSERVE_SAMPLER=%r; defaulting to always_on",
|
|
550
|
+
raw_sampler,
|
|
551
|
+
)
|
|
552
|
+
self._sampler_strategy = SamplerStrategy.ALWAYS_ON
|
|
553
|
+
|
|
554
|
+
# Resolve sample rate (for TRACE_ID_RATIO)
|
|
555
|
+
raw_rate = os.environ.get("SPANFORGE_OBSERVE_SAMPLE_RATE", "1.0")
|
|
556
|
+
try:
|
|
557
|
+
self._sample_rate: float = max(0.0, min(1.0, float(raw_rate)))
|
|
558
|
+
except ValueError:
|
|
559
|
+
self._sample_rate = 1.0
|
|
560
|
+
|
|
561
|
+
# Thread-safe annotation store and session stats
|
|
562
|
+
self._annotations: list[Annotation] = []
|
|
563
|
+
self._annotations_lock = threading.Lock()
|
|
564
|
+
self._stats = _ObserveSessionStats()
|
|
565
|
+
|
|
566
|
+
# Local span buffer
|
|
567
|
+
self._span_buffer: list[dict[str, Any]] = []
|
|
568
|
+
self._buffer_lock = threading.Lock()
|
|
569
|
+
|
|
570
|
+
# ------------------------------------------------------------------
|
|
571
|
+
# OBS-001: export_spans
|
|
572
|
+
# ------------------------------------------------------------------
|
|
573
|
+
|
|
574
|
+
def export_spans(
|
|
575
|
+
self,
|
|
576
|
+
spans: list[dict[str, Any]],
|
|
577
|
+
*,
|
|
578
|
+
receiver_config: ReceiverConfig | None = None,
|
|
579
|
+
) -> ExportResult:
|
|
580
|
+
"""Export a batch of spans to the configured backend.
|
|
581
|
+
|
|
582
|
+
Each span dict should be an OTLP-compatible dict as produced by
|
|
583
|
+
:meth:`emit_span`, or any dict with at least ``"name"`` and
|
|
584
|
+
``"traceId"`` fields.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
spans: List of span dicts to export.
|
|
588
|
+
receiver_config: Optional per-call override for the export
|
|
589
|
+
endpoint and headers. When provided, this
|
|
590
|
+
takes precedence over
|
|
591
|
+
``config.endpoint`` for this call only.
|
|
592
|
+
|
|
593
|
+
Returns:
|
|
594
|
+
:class:`~spanforge.sdk._types.ExportResult` with counts and
|
|
595
|
+
backend label.
|
|
596
|
+
|
|
597
|
+
Raises:
|
|
598
|
+
SFObserveExportError: If the export fails and
|
|
599
|
+
``config.local_fallback_enabled`` is ``False``.
|
|
600
|
+
"""
|
|
601
|
+
if not isinstance(spans, list):
|
|
602
|
+
raise SFObserveExportError(f"spans must be a list; got {type(spans).__name__}")
|
|
603
|
+
|
|
604
|
+
exported_at = datetime.now(timezone.utc).isoformat()
|
|
605
|
+
exported_count = 0
|
|
606
|
+
failed_count = 0
|
|
607
|
+
|
|
608
|
+
try:
|
|
609
|
+
exported_count, failed_count = self._do_export(spans, receiver_config)
|
|
610
|
+
except SFObserveExportError:
|
|
611
|
+
if not self._config.local_fallback_enabled:
|
|
612
|
+
with self._stats._lock:
|
|
613
|
+
self._stats.healthy = False
|
|
614
|
+
raise
|
|
615
|
+
# fallback: buffer locally
|
|
616
|
+
_log.warning(
|
|
617
|
+
"sf-observe: export to %s failed; buffering %d spans locally",
|
|
618
|
+
self._backend,
|
|
619
|
+
len(spans),
|
|
620
|
+
)
|
|
621
|
+
with self._buffer_lock:
|
|
622
|
+
self._span_buffer.extend(spans[-_LOCAL_BUFFER_MAX:])
|
|
623
|
+
exported_count = len(spans)
|
|
624
|
+
failed_count = 0
|
|
625
|
+
|
|
626
|
+
with self._stats._lock:
|
|
627
|
+
self._stats.export_count += 1
|
|
628
|
+
self._stats.last_export_at = exported_at
|
|
629
|
+
self._stats.healthy = failed_count == 0
|
|
630
|
+
|
|
631
|
+
return ExportResult(
|
|
632
|
+
exported_count=exported_count,
|
|
633
|
+
failed_count=failed_count,
|
|
634
|
+
backend=self._backend,
|
|
635
|
+
exported_at=exported_at,
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
def _do_export(
|
|
639
|
+
self,
|
|
640
|
+
spans: list[dict[str, Any]],
|
|
641
|
+
receiver_config: ReceiverConfig | None,
|
|
642
|
+
) -> tuple[int, int]:
|
|
643
|
+
"""Internal export dispatch.
|
|
644
|
+
|
|
645
|
+
Returns:
|
|
646
|
+
``(exported_count, failed_count)`` tuple.
|
|
647
|
+
|
|
648
|
+
Raises:
|
|
649
|
+
SFObserveExportError: On backend failure.
|
|
650
|
+
"""
|
|
651
|
+
if not spans:
|
|
652
|
+
return 0, 0
|
|
653
|
+
|
|
654
|
+
backend = self._backend
|
|
655
|
+
|
|
656
|
+
# Per-call override switches to OTLP regardless of global backend.
|
|
657
|
+
if receiver_config is not None:
|
|
658
|
+
_validate_http_url(receiver_config.endpoint)
|
|
659
|
+
payload = _build_otlp_payload(spans)
|
|
660
|
+
headers: dict[str, str] = dict(receiver_config.headers)
|
|
661
|
+
api_key = self._config.api_key.get_secret_value()
|
|
662
|
+
if api_key:
|
|
663
|
+
headers.setdefault("Authorization", f"Bearer {api_key}")
|
|
664
|
+
_post_json(
|
|
665
|
+
receiver_config.endpoint,
|
|
666
|
+
payload,
|
|
667
|
+
headers,
|
|
668
|
+
timeout_seconds=receiver_config.timeout_seconds,
|
|
669
|
+
)
|
|
670
|
+
return len(spans), 0
|
|
671
|
+
|
|
672
|
+
# Global backend selection
|
|
673
|
+
if backend == "local" or self._is_local_mode():
|
|
674
|
+
with self._buffer_lock:
|
|
675
|
+
buf_space = _LOCAL_BUFFER_MAX - len(self._span_buffer)
|
|
676
|
+
accepted = spans[:buf_space]
|
|
677
|
+
self._span_buffer.extend(accepted)
|
|
678
|
+
return len(spans), 0
|
|
679
|
+
|
|
680
|
+
endpoint = self._config.endpoint.rstrip("/")
|
|
681
|
+
api_key = self._config.api_key.get_secret_value()
|
|
682
|
+
base_headers: dict[str, str] = {}
|
|
683
|
+
if api_key:
|
|
684
|
+
base_headers["Authorization"] = f"Bearer {api_key}"
|
|
685
|
+
|
|
686
|
+
if backend == "otlp":
|
|
687
|
+
_validate_http_url(endpoint + "/v1/traces")
|
|
688
|
+
payload = _build_otlp_payload(spans)
|
|
689
|
+
_post_json(endpoint + "/v1/traces", payload, base_headers)
|
|
690
|
+
|
|
691
|
+
elif backend == "datadog":
|
|
692
|
+
_validate_http_url(endpoint + "/api/v0.2/traces")
|
|
693
|
+
dd_payload: dict[str, Any] = {"traces": [[_span_to_dd(s) for s in spans]]}
|
|
694
|
+
_post_json(endpoint + "/api/v0.2/traces", dd_payload, base_headers)
|
|
695
|
+
|
|
696
|
+
elif backend == "grafana":
|
|
697
|
+
_validate_http_url(endpoint + "/api/v1/push")
|
|
698
|
+
payload = _build_otlp_payload(spans)
|
|
699
|
+
_post_json(endpoint + "/api/v1/push", payload, base_headers)
|
|
700
|
+
|
|
701
|
+
elif backend == "splunk":
|
|
702
|
+
# Splunk HEC (OBS-040)
|
|
703
|
+
_validate_http_url(endpoint + "/services/collector")
|
|
704
|
+
events = [{"event": s, "sourcetype": "spanforge:otel"} for s in spans]
|
|
705
|
+
splunk_payload: dict[str, Any] = {"events": events}
|
|
706
|
+
_post_json(endpoint + "/services/collector", splunk_payload, base_headers)
|
|
707
|
+
|
|
708
|
+
elif backend == "elastic":
|
|
709
|
+
# Elastic APM / OpenSearch ECS (OBS-041)
|
|
710
|
+
_validate_http_url(endpoint + "/_bulk")
|
|
711
|
+
lines: list[dict[str, Any]] = []
|
|
712
|
+
for s in spans:
|
|
713
|
+
lines.append({"index": {"_index": "apm-spans"}})
|
|
714
|
+
lines.append(_span_to_ecs(s))
|
|
715
|
+
elastic_payload: dict[str, Any] = {"operations": lines}
|
|
716
|
+
_post_json(endpoint + "/_bulk", elastic_payload, base_headers)
|
|
717
|
+
|
|
718
|
+
elif backend == "redis":
|
|
719
|
+
# Redis Streams backend — POST to a Redis-over-HTTP bridge (OBS-042)
|
|
720
|
+
_validate_http_url(endpoint + "/xadd")
|
|
721
|
+
redis_payload: dict[str, Any] = {
|
|
722
|
+
"stream": "spanforge:spans",
|
|
723
|
+
"entries": [{"span": s} for s in spans],
|
|
724
|
+
}
|
|
725
|
+
_post_json(endpoint + "/xadd", redis_payload, base_headers)
|
|
726
|
+
|
|
727
|
+
elif backend == "webhook":
|
|
728
|
+
# Generic webhook backend (OBS-043)
|
|
729
|
+
_validate_http_url(endpoint)
|
|
730
|
+
webhook_payload: dict[str, Any] = {
|
|
731
|
+
"source": "spanforge",
|
|
732
|
+
"spans": spans,
|
|
733
|
+
}
|
|
734
|
+
_post_json(endpoint, webhook_payload, base_headers)
|
|
735
|
+
|
|
736
|
+
elif backend == "cloud":
|
|
737
|
+
# Cloud spans backend — OTLP-compatible cloud collector (OBS-044)
|
|
738
|
+
_validate_http_url(endpoint + "/v1/traces")
|
|
739
|
+
payload = _build_otlp_payload(spans)
|
|
740
|
+
_post_json(endpoint + "/v1/traces", payload, base_headers)
|
|
741
|
+
|
|
742
|
+
elif backend == "syslog":
|
|
743
|
+
# Syslog/SIEM backend — POST as CEF/syslog JSON (OBS-045)
|
|
744
|
+
_validate_http_url(endpoint + "/syslog")
|
|
745
|
+
syslog_payload: dict[str, Any] = {
|
|
746
|
+
"format": "cef",
|
|
747
|
+
"events": [{"cef": _span_to_cef(s)} for s in spans],
|
|
748
|
+
}
|
|
749
|
+
_post_json(endpoint + "/syslog", syslog_payload, base_headers)
|
|
750
|
+
|
|
751
|
+
elif backend == "jsonl":
|
|
752
|
+
# JSONL stream backend — POST newline-delimited JSON (OBS-046)
|
|
753
|
+
_validate_http_url(endpoint + "/ingest")
|
|
754
|
+
jsonl_body = "\n".join(json.dumps(s, default=str) for s in spans)
|
|
755
|
+
jsonl_payload: dict[str, Any] = {"data": jsonl_body}
|
|
756
|
+
_post_json(endpoint + "/ingest", jsonl_payload, base_headers)
|
|
757
|
+
|
|
758
|
+
else:
|
|
759
|
+
# Unknown backend — local fallback
|
|
760
|
+
with self._buffer_lock:
|
|
761
|
+
self._span_buffer.extend(spans)
|
|
762
|
+
|
|
763
|
+
return len(spans), 0
|
|
764
|
+
|
|
765
|
+
# ------------------------------------------------------------------
|
|
766
|
+
# OBS-004: emit_span
|
|
767
|
+
# ------------------------------------------------------------------
|
|
768
|
+
|
|
769
|
+
def emit_span(
|
|
770
|
+
self,
|
|
771
|
+
name: str,
|
|
772
|
+
attributes: dict[str, Any],
|
|
773
|
+
) -> str:
|
|
774
|
+
"""Emit a single span with OTel GenAI semantic conventions.
|
|
775
|
+
|
|
776
|
+
Generates W3C TraceContext identifiers, applies the configured
|
|
777
|
+
sampling strategy, enriches the span with OTel resource attributes,
|
|
778
|
+
and routes it through :meth:`export_spans`.
|
|
779
|
+
|
|
780
|
+
Args:
|
|
781
|
+
name: Span name (e.g. ``"chat.completion"``).
|
|
782
|
+
attributes: Span attributes. ``gen_ai.*`` keys are forwarded
|
|
783
|
+
as-is. Inject ``"status": "error"`` to mark an
|
|
784
|
+
error span (OBS-015). Inject ``"traceparent"``
|
|
785
|
+
to provide an existing parent context (OBS-011).
|
|
786
|
+
|
|
787
|
+
Returns:
|
|
788
|
+
The 16-hex span ID string.
|
|
789
|
+
|
|
790
|
+
Raises:
|
|
791
|
+
SFObserveEmitError: If *name* is empty or *attributes* is not
|
|
792
|
+
a dict.
|
|
793
|
+
"""
|
|
794
|
+
if not name:
|
|
795
|
+
raise SFObserveEmitError("span name must not be empty")
|
|
796
|
+
if not isinstance(attributes, dict):
|
|
797
|
+
raise SFObserveEmitError(f"attributes must be a dict; got {type(attributes).__name__}")
|
|
798
|
+
|
|
799
|
+
# Extract parent traceparent if provided (OBS-011)
|
|
800
|
+
parent_trace_id: str | None = None
|
|
801
|
+
parent_sampled: bool | None = None
|
|
802
|
+
if "traceparent" in attributes:
|
|
803
|
+
try:
|
|
804
|
+
parent_trace_id, _, parent_sampled = extract_traceparent(
|
|
805
|
+
str(attributes["traceparent"])
|
|
806
|
+
)
|
|
807
|
+
except ValueError:
|
|
808
|
+
_log.debug("emit_span: invalid parent traceparent — ignoring")
|
|
809
|
+
|
|
810
|
+
# Generate identifiers
|
|
811
|
+
trace_id = parent_trace_id or _generate_trace_id()
|
|
812
|
+
span_id = _generate_span_id()
|
|
813
|
+
|
|
814
|
+
# Sampling decision (OBS-031)
|
|
815
|
+
sampled = _should_sample(
|
|
816
|
+
self._sampler_strategy,
|
|
817
|
+
self._sample_rate,
|
|
818
|
+
trace_id,
|
|
819
|
+
parent_sampled,
|
|
820
|
+
)
|
|
821
|
+
if not sampled:
|
|
822
|
+
# Still return a span_id; caller can observe the sampling decision.
|
|
823
|
+
with self._stats._lock:
|
|
824
|
+
self._stats.span_count += 1
|
|
825
|
+
return span_id
|
|
826
|
+
|
|
827
|
+
# Build OTLP span dict (OBS-010, OBS-014, OBS-015)
|
|
828
|
+
span = _build_otel_span(name, attributes, trace_id, span_id, sampled=True)
|
|
829
|
+
|
|
830
|
+
try:
|
|
831
|
+
self.export_spans([span])
|
|
832
|
+
except SFObserveExportError as exc:
|
|
833
|
+
raise SFObserveEmitError(f"export failed: {exc}") from exc
|
|
834
|
+
|
|
835
|
+
with self._stats._lock:
|
|
836
|
+
self._stats.span_count += 1
|
|
837
|
+
|
|
838
|
+
return span_id
|
|
839
|
+
|
|
840
|
+
# ------------------------------------------------------------------
|
|
841
|
+
# OBS-002: add_annotation
|
|
842
|
+
# ------------------------------------------------------------------
|
|
843
|
+
|
|
844
|
+
def add_annotation(
|
|
845
|
+
self,
|
|
846
|
+
event_type: str,
|
|
847
|
+
payload: dict[str, Any],
|
|
848
|
+
*,
|
|
849
|
+
project_id: str,
|
|
850
|
+
) -> str:
|
|
851
|
+
"""Store a timestamped annotation.
|
|
852
|
+
|
|
853
|
+
Args:
|
|
854
|
+
event_type: Category label (e.g. ``"model_deployed"``).
|
|
855
|
+
payload: Arbitrary JSON-serialisable key/value metadata.
|
|
856
|
+
project_id: Project scope for this annotation.
|
|
857
|
+
|
|
858
|
+
Returns:
|
|
859
|
+
The opaque annotation ID (UUID string).
|
|
860
|
+
|
|
861
|
+
Raises:
|
|
862
|
+
SFObserveAnnotationError: If *event_type* is empty or *payload*
|
|
863
|
+
is not a dict.
|
|
864
|
+
"""
|
|
865
|
+
if not event_type:
|
|
866
|
+
raise SFObserveAnnotationError("event_type must not be empty")
|
|
867
|
+
if not isinstance(payload, dict):
|
|
868
|
+
raise SFObserveAnnotationError(f"payload must be a dict; got {type(payload).__name__}")
|
|
869
|
+
|
|
870
|
+
annotation_id = str(uuid.uuid4())
|
|
871
|
+
created_at = datetime.now(timezone.utc).isoformat()
|
|
872
|
+
annotation = Annotation(
|
|
873
|
+
annotation_id=annotation_id,
|
|
874
|
+
event_type=event_type,
|
|
875
|
+
payload=payload,
|
|
876
|
+
project_id=project_id,
|
|
877
|
+
created_at=created_at,
|
|
878
|
+
)
|
|
879
|
+
with self._annotations_lock:
|
|
880
|
+
self._annotations.append(annotation)
|
|
881
|
+
with self._stats._lock:
|
|
882
|
+
self._stats.annotation_count += 1
|
|
883
|
+
return annotation_id
|
|
884
|
+
|
|
885
|
+
# ------------------------------------------------------------------
|
|
886
|
+
# OBS-003: get_annotations
|
|
887
|
+
# ------------------------------------------------------------------
|
|
888
|
+
|
|
889
|
+
def get_annotations(
|
|
890
|
+
self,
|
|
891
|
+
event_type: str,
|
|
892
|
+
from_dt: str,
|
|
893
|
+
to_dt: str,
|
|
894
|
+
*,
|
|
895
|
+
project_id: str = "",
|
|
896
|
+
) -> list[Annotation]:
|
|
897
|
+
"""Query stored annotations by type and time range.
|
|
898
|
+
|
|
899
|
+
Args:
|
|
900
|
+
event_type: Category label to filter by. Pass ``"*"`` to match
|
|
901
|
+
all event types.
|
|
902
|
+
from_dt: ISO-8601 UTC start timestamp (inclusive).
|
|
903
|
+
to_dt: ISO-8601 UTC end timestamp (inclusive).
|
|
904
|
+
project_id: Optional project scope filter. Empty string disables
|
|
905
|
+
project filtering.
|
|
906
|
+
|
|
907
|
+
Returns:
|
|
908
|
+
Matching :class:`~spanforge.sdk._types.Annotation` instances,
|
|
909
|
+
ordered by creation time.
|
|
910
|
+
|
|
911
|
+
Raises:
|
|
912
|
+
SFObserveAnnotationError: If *from_dt* or *to_dt* are not
|
|
913
|
+
valid ISO-8601 strings.
|
|
914
|
+
"""
|
|
915
|
+
try:
|
|
916
|
+
_from = datetime.fromisoformat(from_dt)
|
|
917
|
+
_to = datetime.fromisoformat(to_dt)
|
|
918
|
+
except ValueError as exc:
|
|
919
|
+
raise SFObserveAnnotationError(f"Invalid datetime string: {exc}") from exc
|
|
920
|
+
|
|
921
|
+
results: list[Annotation] = []
|
|
922
|
+
with self._annotations_lock:
|
|
923
|
+
for ann in self._annotations:
|
|
924
|
+
if event_type not in ("*", ann.event_type):
|
|
925
|
+
continue
|
|
926
|
+
if project_id and ann.project_id != project_id:
|
|
927
|
+
continue
|
|
928
|
+
try:
|
|
929
|
+
created = datetime.fromisoformat(ann.created_at)
|
|
930
|
+
except ValueError:
|
|
931
|
+
continue
|
|
932
|
+
if _from <= created <= _to:
|
|
933
|
+
results.append(ann)
|
|
934
|
+
return results
|
|
935
|
+
|
|
936
|
+
# ------------------------------------------------------------------
|
|
937
|
+
# emit_span_async (F-10)
|
|
938
|
+
# ------------------------------------------------------------------
|
|
939
|
+
|
|
940
|
+
async def emit_span_async(
|
|
941
|
+
self,
|
|
942
|
+
name: str,
|
|
943
|
+
attributes: dict[str, Any],
|
|
944
|
+
) -> str:
|
|
945
|
+
"""Async variant of :meth:`emit_span`.
|
|
946
|
+
|
|
947
|
+
Runs the span export in the default executor so the event loop is not
|
|
948
|
+
blocked when exporting to remote OTLP endpoints.
|
|
949
|
+
|
|
950
|
+
Args:
|
|
951
|
+
name: Span name (e.g. ``"chat.completion"``).
|
|
952
|
+
attributes: Span attributes dict.
|
|
953
|
+
|
|
954
|
+
Returns:
|
|
955
|
+
The 16-hex span ID string.
|
|
956
|
+
"""
|
|
957
|
+
import asyncio
|
|
958
|
+
import functools
|
|
959
|
+
|
|
960
|
+
loop = asyncio.get_event_loop()
|
|
961
|
+
return await loop.run_in_executor(
|
|
962
|
+
None,
|
|
963
|
+
functools.partial(self.emit_span, name, attributes),
|
|
964
|
+
)
|
|
965
|
+
|
|
966
|
+
# ------------------------------------------------------------------
|
|
967
|
+
# get_status
|
|
968
|
+
# ------------------------------------------------------------------
|
|
969
|
+
|
|
970
|
+
def get_status(self) -> ObserveStatusInfo:
|
|
971
|
+
"""Return current service health and session statistics.
|
|
972
|
+
|
|
973
|
+
Returns:
|
|
974
|
+
:class:`~spanforge.sdk._types.ObserveStatusInfo` snapshot.
|
|
975
|
+
"""
|
|
976
|
+
# Gather BatchExporter aggregate stats if any exporters are active.
|
|
977
|
+
dropped: int | None = None
|
|
978
|
+
circuit_open: bool | None = None
|
|
979
|
+
try:
|
|
980
|
+
from spanforge._batch_exporter import get_aggregate_health
|
|
981
|
+
|
|
982
|
+
agg = get_aggregate_health()
|
|
983
|
+
if int(agg["exporter_count"]) > 0:
|
|
984
|
+
dropped = int(agg["total_dropped"])
|
|
985
|
+
circuit_open = bool(agg["any_circuit_open"])
|
|
986
|
+
except Exception: # nosec B110 — optional enrichment only
|
|
987
|
+
pass
|
|
988
|
+
|
|
989
|
+
with self._stats._lock:
|
|
990
|
+
return ObserveStatusInfo(
|
|
991
|
+
status="ok" if self._stats.healthy else "degraded",
|
|
992
|
+
backend=self._backend,
|
|
993
|
+
sampler_strategy=self._sampler_strategy.value,
|
|
994
|
+
span_count=self._stats.span_count,
|
|
995
|
+
annotation_count=self._stats.annotation_count,
|
|
996
|
+
export_count=self._stats.export_count,
|
|
997
|
+
last_export_at=self._stats.last_export_at,
|
|
998
|
+
healthy=self._stats.healthy,
|
|
999
|
+
dropped_count=dropped,
|
|
1000
|
+
circuit_open=circuit_open,
|
|
1001
|
+
)
|
|
1002
|
+
|
|
1003
|
+
# ------------------------------------------------------------------
|
|
1004
|
+
# OBS-043: health probes
|
|
1005
|
+
# ------------------------------------------------------------------
|
|
1006
|
+
|
|
1007
|
+
@property
|
|
1008
|
+
def healthy(self) -> bool:
|
|
1009
|
+
"""``True`` if the last export succeeded (or no export has been attempted)."""
|
|
1010
|
+
with self._stats._lock:
|
|
1011
|
+
return self._stats.healthy
|
|
1012
|
+
|
|
1013
|
+
@property
|
|
1014
|
+
def last_export_at(self) -> str | None:
|
|
1015
|
+
"""ISO-8601 UTC timestamp of the most recent export, or ``None``."""
|
|
1016
|
+
with self._stats._lock:
|
|
1017
|
+
return self._stats.last_export_at
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
# ---------------------------------------------------------------------------
|
|
1021
|
+
# Backend-specific span serialisation helpers
|
|
1022
|
+
# ---------------------------------------------------------------------------
|
|
1023
|
+
|
|
1024
|
+
|
|
1025
|
+
def _span_to_dd(span: dict[str, Any]) -> dict[str, Any]:
|
|
1026
|
+
"""Translate an OTLP span dict to a minimal Datadog trace payload."""
|
|
1027
|
+
now_ns = int(datetime.now(timezone.utc).timestamp() * 1_000_000_000)
|
|
1028
|
+
return {
|
|
1029
|
+
"trace_id": int(span.get("traceId", "0" * 32)[:16], 16),
|
|
1030
|
+
"span_id": int(span.get("spanId", "0" * 16), 16),
|
|
1031
|
+
"name": span.get("name", ""),
|
|
1032
|
+
"start": span.get("startTimeUnixNano", now_ns),
|
|
1033
|
+
"duration": 0,
|
|
1034
|
+
"error": 1 if span.get("status", {}).get("code") == "STATUS_CODE_ERROR" else 0,
|
|
1035
|
+
"meta": {str(k): str(v) for k, v in span.get("attributes", {}).items()},
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
|
|
1039
|
+
def _span_to_ecs(span: dict[str, Any]) -> dict[str, Any]:
|
|
1040
|
+
"""Translate an OTLP span dict to a minimal Elastic Common Schema document."""
|
|
1041
|
+
return {
|
|
1042
|
+
"trace.id": span.get("traceId", ""),
|
|
1043
|
+
"transaction.id": span.get("spanId", ""),
|
|
1044
|
+
"span.name": span.get("name", ""),
|
|
1045
|
+
"service.name": (
|
|
1046
|
+
span.get("resource", {}).get("attributes", {}).get("service.name", "spanforge")
|
|
1047
|
+
),
|
|
1048
|
+
"labels": {str(k): str(v) for k, v in span.get("attributes", {}).items()},
|
|
1049
|
+
"event.outcome": (
|
|
1050
|
+
"failure" if span.get("status", {}).get("code") == "STATUS_CODE_ERROR" else "success"
|
|
1051
|
+
),
|
|
1052
|
+
"@timestamp": datetime.now(timezone.utc).isoformat(),
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
def _span_to_cef(span: dict[str, Any]) -> str:
|
|
1057
|
+
"""Translate an OTLP span dict to a minimal CEF (Common Event Format) string."""
|
|
1058
|
+
severity = "7" if span.get("status", {}).get("code") == "STATUS_CODE_ERROR" else "3"
|
|
1059
|
+
name = span.get("name", "span")
|
|
1060
|
+
trace_id = span.get("traceId", "")
|
|
1061
|
+
span_id = span.get("spanId", "")
|
|
1062
|
+
return (
|
|
1063
|
+
f"CEF:0|SpanForge|spanforge-sdk|1.0|span|{name}|{severity}|"
|
|
1064
|
+
f"traceId={trace_id} spanId={span_id}"
|
|
1065
|
+
)
|