threadify-sdk 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,318 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ if TYPE_CHECKING:
8
+ from opentelemetry.sdk.trace import ReadableSpan
9
+
10
+ from threadify.connection import Connection
11
+
12
+ logger = logging.getLogger("threadify.otel")
13
+
14
+ # Span status codes (OpenTelemetry proto)
15
+ _STATUS_UNSET = 0
16
+ _STATUS_OK = 1
17
+ _STATUS_ERROR = 2
18
+
19
+ # Attempt to inherit from SpanExporter when OTel is installed.
20
+ # This makes isinstance() checks in BatchSpanProcessor work correctly.
21
+ try:
22
+ from opentelemetry.sdk.trace.export import SpanExporter as _SpanExporterBase
23
+ except ImportError:
24
+ _SpanExporterBase = object # type: ignore[misc, assignment]
25
+
26
+
27
+ def _require_otel() -> None:
28
+ """Raise ImportError with a helpful message if OpenTelemetry is not installed."""
29
+ try:
30
+ import opentelemetry.sdk.trace.export # noqa: F401
31
+ except ImportError as exc:
32
+ raise ImportError(
33
+ "OpenTelemetry is required for ThreadifySpanExporter. "
34
+ "Install it: pip install opentelemetry-api opentelemetry-sdk"
35
+ ) from exc
36
+
37
+
38
+ class ThreadifySpanExporter(_SpanExporterBase):
39
+ """OpenTelemetry SpanExporter that auto-translates Spans into Threadify Threads/Steps.
40
+
41
+ This hooks into the OpenTelemetry SDK and creates Threadify threads and steps
42
+ from span data, enabling zero-instrumentation observability for existing
43
+ OpenTelemetry-instrumented applications.
44
+
45
+ Usage::
46
+
47
+ from opentelemetry import trace
48
+ from opentelemetry.sdk.trace import TracerProvider
49
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
50
+ from threadify import Threadify
51
+
52
+ conn = await Threadify.connect("api-key", service_name="my-service", ...)
53
+ thread = await conn.start(contract_name="order_flow")
54
+
55
+ exporter = thread.create_span_exporter(options={"refs": ["orderId"]})
56
+
57
+ provider = TracerProvider()
58
+ provider.add_span_processor(BatchSpanProcessor(exporter))
59
+ trace.set_tracer_provider(provider)
60
+ """
61
+
62
+ def __init__(self, connection: Connection, options: dict[str, Any] | None = None):
63
+ _require_otel()
64
+ self._connection = connection
65
+ self._options = options or {}
66
+
67
+ # Normalise refs to a mapping {attribute_key: ref_key}
68
+ refs = self._options.get("refs", [])
69
+ if isinstance(refs, list):
70
+ self._refs_map: dict[str, str] = {k: k for k in refs}
71
+ elif isinstance(refs, dict):
72
+ self._refs_map = dict(refs)
73
+ else:
74
+ self._refs_map = {}
75
+
76
+ # trace_id -> asyncio.Future[ThreadInstance]
77
+ self._trace_threads: dict[str, asyncio.Future[Any]] = {}
78
+
79
+ # Capture the event loop so we can schedule coroutines from sync export().
80
+ try:
81
+ self._loop = asyncio.get_running_loop()
82
+ except RuntimeError:
83
+ self._loop = None
84
+
85
+ def export(
86
+ self, spans: list[ReadableSpan], timeout_millis: float = 30000
87
+ ) -> Any:
88
+ """Export a batch of spans.
89
+
90
+ Called by the OpenTelemetry ``BatchSpanProcessor`` (typically from a
91
+ worker thread). We schedule the async work on the captured event loop.
92
+ """
93
+ if not self._connection.is_connected:
94
+ return self._make_result(1, "Threadify connection is not open")
95
+
96
+ if self._loop is None or self._loop.is_closed():
97
+ return self._make_result(1, "No running event loop available")
98
+
99
+ asyncio.run_coroutine_threadsafe(self._process_all(spans), self._loop)
100
+ return self._make_result(0)
101
+
102
+ def force_flush(self, timeout_millis: float = 30000) -> bool:
103
+ """No-op — spans are sent immediately."""
104
+ return True
105
+
106
+ def shutdown(self) -> None:
107
+ """No-op — connection lifecycle is managed externally."""
108
+ return None
109
+
110
+ # --- internals ---
111
+
112
+ async def _process_all(self, spans: list[ReadableSpan]) -> None:
113
+ for span in spans:
114
+ await self._process_span(span)
115
+
116
+ async def _process_span(self, span: ReadableSpan) -> None:
117
+ try:
118
+ await self._do_process_span(span)
119
+ except Exception:
120
+ logger.exception("Failed to process span")
121
+
122
+ async def _do_process_span(self, span: ReadableSpan) -> None:
123
+ ctx = span.span_context
124
+ trace_id = format(ctx.trace_id, "032x")
125
+ span_id = format(ctx.span_id, "016x")
126
+ thread = await self._get_or_start_thread(span, trace_id)
127
+
128
+ # Step name
129
+ step_name = self._span_attr(span, "threadify.step_name") or span.name
130
+ service_name = self._span_attr(span, "threadify.service")
131
+ step = thread.step(step_name, service_name or self._connection.service_name)
132
+
133
+ # Separate attributes into context / refs
134
+ context: dict[str, str] = {}
135
+ refs: dict[str, str] = {
136
+ "otel_trace_id": trace_id,
137
+ "otel_span_id": span_id,
138
+ }
139
+
140
+ for key, value in span.attributes.items():
141
+ # Skip internal threadify directives
142
+ if key in {
143
+ "threadify.thread_id",
144
+ "threadify.contract",
145
+ "threadify.label",
146
+ "threadify.step_name",
147
+ "threadify.role",
148
+ "threadify.service",
149
+ }:
150
+ continue
151
+
152
+ str_value = str(value)
153
+ if key in self._refs_map or key.startswith("threadify.ref."):
154
+ ref_key = (
155
+ key.replace("threadify.ref.", "")
156
+ if key.startswith("threadify.ref.")
157
+ else self._refs_map[key]
158
+ )
159
+ refs[ref_key] = str_value
160
+ elif key.startswith("threadify.context."):
161
+ context[key.replace("threadify.context.", "")] = str_value
162
+ else:
163
+ context[key] = str_value
164
+
165
+ if context:
166
+ step.add_context(context)
167
+ if refs:
168
+ step.add_refs(refs)
169
+
170
+ # Map timing (OTel uses nanoseconds since epoch)
171
+ start_time_ns = span.start_time
172
+ end_time_ns = span.end_time
173
+ if start_time_ns:
174
+ step._event["startedAt"] = _ns_to_iso(start_time_ns)
175
+ if end_time_ns:
176
+ step._event["finishedAt"] = _ns_to_iso(end_time_ns)
177
+
178
+ # Map span events to sub-steps
179
+ for event in span.events:
180
+ event_time_ns = event.timestamp
181
+ recorded_at = _ns_to_iso(event_time_ns) if event_time_ns else _now_iso()
182
+ payload: dict[str, Any] = {}
183
+ if event.attributes:
184
+ payload = dict(event.attributes)
185
+ step.sub_step(
186
+ name=event.name,
187
+ data=payload,
188
+ status="success",
189
+ )
190
+ # Update recordedAt on the last sub-step data
191
+ if step._sub_steps:
192
+ step._sub_steps[-1].recorded_at = recorded_at
193
+
194
+ # Map status
195
+ target_status = STATUS_SUCCESS
196
+ message = ""
197
+ if span.status:
198
+ message = span.status.description or ""
199
+ try:
200
+ from opentelemetry.trace.status import StatusCode
201
+
202
+ if span.status.status_code is StatusCode.ERROR:
203
+ target_status = STATUS_FAILED
204
+ except Exception:
205
+ # Defensive: fallback to raw int if enum isn't available
206
+ if getattr(span.status.status_code, "value", 0) == _STATUS_ERROR:
207
+ target_status = STATUS_FAILED
208
+
209
+ if target_status == STATUS_SUCCESS:
210
+ await step.success(message or "")
211
+ else:
212
+ await step.failed(message or "Span ended with error status")
213
+
214
+ # Root span auto-complete
215
+ parent_ctx = getattr(span, "parent", None)
216
+ parent_span_id = format(parent_ctx.span_id, "016x") if parent_ctx else None
217
+ if not parent_span_id:
218
+ if target_status == STATUS_SUCCESS:
219
+ await thread.complete("Root span completed successfully")
220
+ else:
221
+ await thread.close("Root span failed")
222
+ # Clean up the trace map since the trace is finished
223
+ self._trace_threads.pop(trace_id, None)
224
+
225
+ async def _get_or_start_thread(self, span: ReadableSpan, trace_id: str) -> Any:
226
+ """Get or create a ThreadInstance for this trace."""
227
+ from threadify.thread import ThreadInstance
228
+
229
+ if trace_id not in self._trace_threads:
230
+ fut: asyncio.Future[Any] = asyncio.get_event_loop().create_future()
231
+ self._trace_threads[trace_id] = fut
232
+
233
+ try:
234
+ existing_thread_id = self._span_attr(span, "threadify.thread_id")
235
+ if existing_thread_id:
236
+ role = self._span_attr(span, "threadify.role") or "participant"
237
+ thread = await self._connection.join(existing_thread_id, role)
238
+ else:
239
+ contract_name = self._span_attr(span, "threadify.contract")
240
+ label = self._span_attr(span, "threadify.label") or span.name
241
+ service_name = (
242
+ self._span_attr(span, "threadify.service")
243
+ or self._connection.service_name
244
+ )
245
+ role = self._span_attr(span, "threadify.role") or "participant"
246
+
247
+ # Try to find an existing thread via GraphQL
248
+ try:
249
+ archived = await self._connection.get_thread_by_ref(
250
+ "otel_trace_id", trace_id
251
+ )
252
+ if archived:
253
+ logger.debug(
254
+ "Found existing thread %s via GraphQL, joining...",
255
+ archived.id,
256
+ )
257
+ thread = await self._connection.join(archived.id, role)
258
+ fut.set_result(thread)
259
+ return thread
260
+ except Exception:
261
+ pass
262
+
263
+ thread = await self._connection.start(
264
+ label=label,
265
+ contract_name=contract_name or "",
266
+ service_name=service_name,
267
+ )
268
+ fut.set_result(thread)
269
+ except Exception as exc:
270
+ fut.set_exception(exc)
271
+ raise
272
+
273
+ # Memory-leak safety: remove after 10 minutes
274
+ asyncio.get_event_loop().call_later(
275
+ 600, self._trace_threads.pop, trace_id, None
276
+ )
277
+
278
+ return await self._trace_threads[trace_id]
279
+
280
+ @staticmethod
281
+ def _span_attr(span: ReadableSpan, key: str) -> str | None:
282
+ value = span.attributes.get(key)
283
+ return str(value) if value is not None else None
284
+
285
+ @staticmethod
286
+ def _make_result(code: int, error: str | None = None) -> Any:
287
+ """Build an OpenTelemetry ExportResult-compatible object."""
288
+ try:
289
+ from opentelemetry.sdk.trace.export import SpanExportResult
290
+
291
+ if code == 0:
292
+ return SpanExportResult.SUCCESS
293
+ return SpanExportResult.FAILURE
294
+ except Exception:
295
+ # Fallback for environments without OTel installed at runtime
296
+ return {"code": code, "error": error}
297
+
298
+
299
+ # --- helpers ---
300
+
301
+ STATUS_SUCCESS = "success"
302
+ STATUS_FAILED = "failed"
303
+
304
+
305
+ def _ns_to_iso(nanoseconds: int) -> str:
306
+ from datetime import datetime, timezone
307
+
308
+ seconds = nanoseconds // 1_000_000_000
309
+ ns = nanoseconds % 1_000_000_000
310
+ dt = datetime.fromtimestamp(seconds, tz=timezone.utc)
311
+ # ISO format with nanoseconds
312
+ return dt.strftime("%Y-%m-%dT%H:%M:%S") + f".{ns:09d}Z"
313
+
314
+
315
+ def _now_iso() -> str:
316
+ from datetime import datetime, timezone
317
+
318
+ return datetime.now(timezone.utc).isoformat()
threadify/step.py ADDED
@@ -0,0 +1,312 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ if TYPE_CHECKING:
6
+ from threadify.thread import ThreadInstance
7
+
8
+ from threadify.models import (
9
+ ACTION_RECORD_THREAD_EVENT,
10
+ FIELD_ACTION,
11
+ FIELD_CONTEXT,
12
+ FIELD_FINISHED_AT,
13
+ FIELD_IDEMPOTENCY_KEY,
14
+ FIELD_IS_DUPLICATE,
15
+ FIELD_MESSAGE,
16
+ FIELD_REFS,
17
+ FIELD_SERVICE_NAME,
18
+ FIELD_STARTED_AT,
19
+ FIELD_STATUS,
20
+ FIELD_STEP_NAME,
21
+ FIELD_SUB_STEPS,
22
+ FIELD_THREAD_ID,
23
+ FIELD_THREADIFY_METADATA,
24
+ STATUS_ERROR,
25
+ STATUS_FAILED,
26
+ STATUS_IN_PROGRESS,
27
+ STATUS_SUCCESS,
28
+ StepResult,
29
+ SubStepData,
30
+ first_non_empty,
31
+ now_iso,
32
+ )
33
+
34
+
35
+ class ThreadStep:
36
+ """Fluent builder for recording step events.
37
+
38
+ Usage::
39
+
40
+ step = thread.step("order_placed")
41
+ result = await (
42
+ step
43
+ .add_context({"orderId": "ORD-123", "amount": 99.99})
44
+ .add_refs({"stripe_id": "pi_abc"})
45
+ .success("Order placed!")
46
+ )
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ step_name: str,
52
+ thread: ThreadInstance,
53
+ service_name: str,
54
+ ):
55
+ self._step_name = step_name
56
+ self._thread = thread
57
+ self._service_name = service_name
58
+
59
+ self._manual_idempotency_key: str = ""
60
+ self._sub_steps: list[SubStepData] = []
61
+ self._context: dict[str, str] = {}
62
+ self._refs: dict[str, str] = {}
63
+ self._metadata: dict[str, Any] | None = None
64
+ self._error: Exception | None = None
65
+
66
+ self._event: dict[str, Any] = {
67
+ FIELD_ACTION: ACTION_RECORD_THREAD_EVENT,
68
+ FIELD_THREAD_ID: thread.thread_id,
69
+ FIELD_STEP_NAME: step_name,
70
+ FIELD_STARTED_AT: now_iso(),
71
+ FIELD_FINISHED_AT: None,
72
+ FIELD_STATUS: STATUS_IN_PROGRESS,
73
+ FIELD_SERVICE_NAME: service_name,
74
+ }
75
+
76
+ # --- Fluent builder methods ---
77
+
78
+ def idempotency_key(self, key: str) -> ThreadStep:
79
+ """Set a manual idempotency key for deduplication."""
80
+ if self._error is not None:
81
+ return self
82
+ if not key or not key.strip():
83
+ self._error = ValueError("idempotency key must be a non-empty string")
84
+ return self
85
+ self._manual_idempotency_key = key
86
+ return self
87
+
88
+ def add_context(self, data: dict[str, Any] | None) -> ThreadStep:
89
+ """Add business context data to this step.
90
+
91
+ All values are converted to strings to match the server schema.
92
+ """
93
+ if self._error is not None:
94
+ return self
95
+ if data:
96
+ for k, v in data.items():
97
+ self._context[k] = str(v)
98
+ return self
99
+
100
+ def add_private_context(self, data: dict[str, Any] | None) -> ThreadStep:
101
+ """Add private context data (prefixed with 'private_')."""
102
+ if self._error is not None:
103
+ return self
104
+ if data:
105
+ for k, v in data.items():
106
+ s = str(v)
107
+ self._context[k] = s
108
+ self._context[f"private_{k}"] = s
109
+ return self
110
+
111
+ def add_refs(self, refs: dict[str, str] | None) -> ThreadStep:
112
+ """Add external system references."""
113
+ if self._error is not None:
114
+ return self
115
+ if refs:
116
+ self._refs.update(refs)
117
+ return self
118
+
119
+ def sub_step(
120
+ self,
121
+ name: str,
122
+ data: dict[str, Any] | None = None,
123
+ status: str = "success",
124
+ ) -> ThreadStep:
125
+ """Record a sub-step within this step.
126
+
127
+ Args:
128
+ name: Sub-step name.
129
+ data: Optional payload data.
130
+ status: Must be 'success' or 'failed'.
131
+ """
132
+ if self._error is not None:
133
+ return self
134
+ if not name or not name.strip():
135
+ self._error = ValueError("sub-step name must be a non-empty string")
136
+ return self
137
+ if status not in (STATUS_SUCCESS, STATUS_FAILED):
138
+ self._error = ValueError(
139
+ f'sub-step status must be either "{STATUS_SUCCESS}" or "{STATUS_FAILED}"'
140
+ )
141
+ return self
142
+
143
+ self._sub_steps.append(
144
+ SubStepData(
145
+ name=name,
146
+ status=status,
147
+ payload=data,
148
+ )
149
+ )
150
+ return self
151
+
152
+ # --- Status methods ---
153
+
154
+ async def stop(
155
+ self, status: str = STATUS_SUCCESS, message_or_data: str | dict | None = None
156
+ ) -> StepResult:
157
+ """Stop the step with an explicit status and optional message/data.
158
+
159
+ This is the generic status method; prefer :meth:`success`,
160
+ :meth:`failed`, or :meth:`error` for clarity.
161
+ """
162
+ return await self._stop(status, message_or_data)
163
+
164
+ async def success(self, message_or_data: str | dict | None = None) -> StepResult:
165
+ """Mark the step as successful and send it."""
166
+ return await self._stop(STATUS_SUCCESS, message_or_data)
167
+
168
+ async def failed(self, message_or_data: str | dict | None = None) -> StepResult:
169
+ """Mark the step as failed and send it."""
170
+ return await self._stop(STATUS_FAILED, message_or_data)
171
+
172
+ async def error(self, message_or_data: str | dict | None = None) -> StepResult:
173
+ """Mark the step as error and send it."""
174
+ return await self._stop(STATUS_ERROR, message_or_data)
175
+
176
+ async def _stop(self, status: str, message_or_data: str | dict | None = None) -> StepResult:
177
+ """Finalise the step and send the event."""
178
+ if self._error is not None:
179
+ raise self._error
180
+
181
+ self._event[FIELD_FINISHED_AT] = now_iso()
182
+ self._event[FIELD_STATUS] = status
183
+ self._event[FIELD_CONTEXT] = self._context
184
+ self._event[FIELD_REFS] = self._refs
185
+
186
+ # Handle optional message/data.
187
+ if message_or_data is not None:
188
+ if self._metadata is None:
189
+ self._metadata = {}
190
+ if isinstance(message_or_data, str) and message_or_data:
191
+ self._metadata[FIELD_MESSAGE] = message_or_data
192
+ elif isinstance(message_or_data, dict) and message_or_data:
193
+ self._metadata.update(message_or_data)
194
+
195
+ if self._metadata:
196
+ self._event[FIELD_THREADIFY_METADATA] = self._metadata
197
+
198
+ # Attach sub-steps.
199
+ if self._sub_steps:
200
+ self._event[FIELD_SUB_STEPS] = [
201
+ {
202
+ "name": ss.name,
203
+ "status": ss.status,
204
+ "payload": ss.payload,
205
+ "recordedAt": ss.recorded_at,
206
+ }
207
+ for ss in self._sub_steps
208
+ ]
209
+
210
+ # Generate idempotency key.
211
+ self._event[FIELD_IDEMPOTENCY_KEY] = self._generate_idempotency_key()
212
+
213
+ # Send event.
214
+ try:
215
+ await self._send_event()
216
+ except DuplicateStepError:
217
+ return StepResult(
218
+ step_name=self._step_name,
219
+ thread_id=self._thread.thread_id,
220
+ status=status,
221
+ idempotency_key=self._event.get(FIELD_IDEMPOTENCY_KEY, ""),
222
+ timestamp=first_non_empty(
223
+ self._event.get(FIELD_FINISHED_AT, ""),
224
+ self._event.get(FIELD_STARTED_AT, ""),
225
+ ),
226
+ duplicate=True,
227
+ )
228
+
229
+ return StepResult(
230
+ step_name=self._step_name,
231
+ thread_id=self._thread.thread_id,
232
+ status=status,
233
+ idempotency_key=self._event.get(FIELD_IDEMPOTENCY_KEY, ""),
234
+ timestamp=first_non_empty(
235
+ self._event.get(FIELD_FINISHED_AT, ""),
236
+ self._event.get(FIELD_STARTED_AT, ""),
237
+ ),
238
+ )
239
+
240
+ async def _send_event(self) -> dict[str, Any]:
241
+ """Transmit the event and wait for a response."""
242
+ if not self._thread.thread_id:
243
+ raise RuntimeError("Thread not started")
244
+
245
+ await self._thread._send(self._event)
246
+
247
+ resp = await self._thread._conn._wait_response(
248
+ lambda m: m.get(FIELD_ACTION) == ACTION_RECORD_THREAD_EVENT
249
+ )
250
+
251
+ if resp.get(FIELD_STATUS) != STATUS_SUCCESS:
252
+ msg = resp.get(FIELD_MESSAGE, "failed to record step event")
253
+ if resp.get(FIELD_IS_DUPLICATE):
254
+ raise DuplicateStepError(msg)
255
+ raise RuntimeError(msg)
256
+
257
+ return resp
258
+
259
+ def _generate_idempotency_key(self) -> str:
260
+ """Generate an FNV-1a idempotency key from step name + context."""
261
+ if self._manual_idempotency_key:
262
+ return self._manual_idempotency_key
263
+
264
+ # Build sorted JSON string of context.
265
+ sorted_items = sorted(self._context.items())
266
+ context_json = "{" + ",".join(f'"{k}":"{v}"' for k, v in sorted_items) + "}"
267
+
268
+ input_str = self._step_name + context_json
269
+ h = _fnv1a_32(input_str.encode("utf-8"))
270
+ return f"{h:08x}"
271
+
272
+ # --- Read-only accessors ---
273
+
274
+ @property
275
+ def step_name(self) -> str:
276
+ return self._step_name
277
+
278
+ @property
279
+ def status(self) -> str:
280
+ return self._event.get(FIELD_STATUS, STATUS_IN_PROGRESS)
281
+
282
+ @property
283
+ def context(self) -> dict[str, str]:
284
+ return dict(self._context)
285
+
286
+ @property
287
+ def metadata(self) -> dict[str, Any] | None:
288
+ """Return the current metadata dict (or None if unset)."""
289
+ return dict(self._metadata) if self._metadata is not None else None
290
+
291
+ def get_event_data(self) -> dict[str, Any]:
292
+ """Return a copy of the current event data (for debugging)."""
293
+ return dict(self._event)
294
+
295
+
296
+ class DuplicateStepError(Exception):
297
+ """Raised when a duplicate step is detected."""
298
+
299
+ pass
300
+
301
+
302
+ def is_duplicate_error(error: Exception) -> bool:
303
+ return isinstance(error, DuplicateStepError)
304
+
305
+
306
+ def _fnv1a_32(data: bytes) -> int:
307
+ """FNV-1a 32-bit hash — matches the JS SDK implementation."""
308
+ h = 0x811C9DC5
309
+ for byte in data:
310
+ h ^= byte
311
+ h = (h * 0x01000193) & 0xFFFFFFFF
312
+ return h