spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,444 @@
1
+ """spanforge.integrations.groq — Auto-instrumentation for the Groq Python SDK.
2
+
3
+ This module monkey-patches the Groq client so every
4
+ ``client.chat.completions.create(...)`` call automatically populates the
5
+ active :class:`~spanforge._span.Span` with:
6
+
7
+ * :class:`~spanforge.namespaces.trace.TokenUsage` (input / output token counts)
8
+ * :class:`~spanforge.namespaces.trace.ModelInfo` (provider = ``groq``, name
9
+ from response)
10
+ * :class:`~spanforge.namespaces.trace.CostBreakdown` (computed from the static
11
+ pricing table below)
12
+
13
+ The Groq SDK mirrors the OpenAI API surface, so the response object has the
14
+ same ``usage.prompt_tokens`` / ``usage.completion_tokens`` fields.
15
+ Additionally, Groq exposes per-request timing via ``usage.total_time``
16
+ (seconds). Use :func:`get_duration_ms` to extract the API-measured latency
17
+ from a response object.
18
+
19
+ Usage::
20
+
21
+ from spanforge.integrations import groq as groq_integration
22
+ groq_integration.patch()
23
+
24
+ from groq import Groq
25
+ client = Groq()
26
+
27
+ import spanforge
28
+ spanforge.configure(exporter="console")
29
+
30
+ with spanforge.span("groq-chat", model="llama3-70b-8192") as span:
31
+ resp = client.chat.completions.create(
32
+ model="llama3-70b-8192",
33
+ messages=[{"role": "user", "content": "Hello"}],
34
+ )
35
+ # → span.token_usage and span.cost auto-populated on exit
36
+
37
+ Calling ``patch()`` is **idempotent** — calling it multiple times has no
38
+ effect. Call :func:`unpatch` to restore the original methods.
39
+
40
+ Install with::
41
+
42
+ pip install "spanforge[groq]"
43
+ """
44
+
45
+ from __future__ import annotations
46
+
47
+ import functools
48
+ from typing import Any
49
+
50
+ from spanforge.namespaces.trace import (
51
+ CostBreakdown,
52
+ GenAISystem,
53
+ ModelInfo,
54
+ TokenUsage,
55
+ )
56
+
57
+ __all__ = [
58
+ "get_duration_ms",
59
+ "is_patched",
60
+ "normalize_response",
61
+ "patch",
62
+ "unpatch",
63
+ ]
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Static pricing table (USD per million tokens, effective 2026-03-04)
67
+ # ---------------------------------------------------------------------------
68
+
69
+ PRICING_DATE: str = "2026-03-04"
70
+
71
+ #: Groq model pricing — USD per million tokens.
72
+ GROQ_PRICING: dict[str, dict[str, float]] = {
73
+ # ------------------------------------------------------------------
74
+ # LLaMA 3.3
75
+ # ------------------------------------------------------------------
76
+ "llama-3.3-70b-versatile": {
77
+ "input": 0.59,
78
+ "output": 0.79,
79
+ },
80
+ "llama-3.3-70b-specdec": {
81
+ "input": 0.59,
82
+ "output": 0.99,
83
+ },
84
+ # ------------------------------------------------------------------
85
+ # LLaMA 3.1
86
+ # ------------------------------------------------------------------
87
+ "llama-3.1-70b-versatile": {
88
+ "input": 0.59,
89
+ "output": 0.79,
90
+ },
91
+ "llama-3.1-8b-instant": {
92
+ "input": 0.05,
93
+ "output": 0.08,
94
+ },
95
+ "llama-3.1-405b-reasoning": {
96
+ "input": 3.00,
97
+ "output": 3.00,
98
+ },
99
+ # ------------------------------------------------------------------
100
+ # LLaMA 3.2
101
+ # ------------------------------------------------------------------
102
+ "llama-3.2-1b-preview": {
103
+ "input": 0.04,
104
+ "output": 0.04,
105
+ },
106
+ "llama-3.2-3b-preview": {
107
+ "input": 0.06,
108
+ "output": 0.06,
109
+ },
110
+ "llama-3.2-11b-vision-preview": {
111
+ "input": 0.18,
112
+ "output": 0.18,
113
+ },
114
+ "llama-3.2-90b-vision-preview": {
115
+ "input": 0.90,
116
+ "output": 0.90,
117
+ },
118
+ # ------------------------------------------------------------------
119
+ # LLaMA 3 (legacy names)
120
+ # ------------------------------------------------------------------
121
+ "llama3-70b-8192": {
122
+ "input": 0.59,
123
+ "output": 0.79,
124
+ },
125
+ "llama3-8b-8192": {
126
+ "input": 0.05,
127
+ "output": 0.08,
128
+ },
129
+ "llama3-groq-70b-8192-tool-use-preview": {
130
+ "input": 0.89,
131
+ "output": 0.89,
132
+ },
133
+ "llama3-groq-8b-8192-tool-use-preview": {
134
+ "input": 0.19,
135
+ "output": 0.19,
136
+ },
137
+ # ------------------------------------------------------------------
138
+ # Mixtral
139
+ # ------------------------------------------------------------------
140
+ "mixtral-8x7b-32768": {
141
+ "input": 0.24,
142
+ "output": 0.24,
143
+ },
144
+ # ------------------------------------------------------------------
145
+ # Gemma
146
+ # ------------------------------------------------------------------
147
+ "gemma-7b-it": {
148
+ "input": 0.07,
149
+ "output": 0.07,
150
+ },
151
+ "gemma2-9b-it": {
152
+ "input": 0.20,
153
+ "output": 0.20,
154
+ },
155
+ }
156
+
157
+ # Sentinel attribute set on the groq module to prevent double-patching.
158
+ _PATCH_FLAG = "_spanforge_patched"
159
+
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # Public API
163
+ # ---------------------------------------------------------------------------
164
+
165
+
166
+ def patch() -> None:
167
+ """Monkey-patch the Groq client to auto-instrument all chat completions.
168
+
169
+ Wraps both ``groq.resources.chat.completions.Completions.create``
170
+ (sync) and ``AsyncCompletions.create`` (async). The wrapper calls
171
+ :func:`normalize_response` on the result and, if a span is currently
172
+ active on this thread, updates it.
173
+
174
+ This function is **idempotent** — safe to call multiple times.
175
+
176
+ Raises:
177
+ ImportError: If the ``groq`` package is not installed.
178
+ """
179
+ groq_mod = _require_groq()
180
+
181
+ if getattr(groq_mod, _PATCH_FLAG, False):
182
+ return # already patched
183
+
184
+ # --- sync ----------------------------------------------------------------
185
+ try:
186
+ from groq.resources.chat.completions import ( # noqa: PLC0415
187
+ Completions, # type: ignore[import-untyped]
188
+ )
189
+
190
+ _orig_sync = Completions.create # type: ignore[attr-defined]
191
+
192
+ @functools.wraps(_orig_sync)
193
+ def _patched_sync(self: Any, *args: Any, **kwargs: Any) -> Any: # noqa: ANN401
194
+ response = _orig_sync(self, *args, **kwargs)
195
+ _auto_populate_span(response)
196
+ return response
197
+
198
+ Completions.create = _patched_sync # type: ignore[method-assign]
199
+ Completions._spanforge_orig_create = _orig_sync # type: ignore[attr-defined]
200
+ except (ImportError, AttributeError): # pragma: no cover
201
+ pass
202
+
203
+ # --- async ---------------------------------------------------------------
204
+ try:
205
+ from groq.resources.chat.completions import ( # noqa: PLC0415
206
+ AsyncCompletions, # type: ignore[import-untyped]
207
+ )
208
+
209
+ _orig_async = AsyncCompletions.create # type: ignore[attr-defined]
210
+
211
+ @functools.wraps(_orig_async)
212
+ async def _patched_async(self: Any, *args: Any, **kwargs: Any) -> Any: # noqa: ANN401
213
+ response = await _orig_async(self, *args, **kwargs)
214
+ _auto_populate_span(response)
215
+ return response
216
+
217
+ AsyncCompletions.create = _patched_async # type: ignore[method-assign]
218
+ AsyncCompletions._spanforge_orig_create = _orig_async # type: ignore[attr-defined]
219
+ except (ImportError, AttributeError): # pragma: no cover
220
+ pass
221
+
222
+ groq_mod._spanforge_patched = True # type: ignore[attr-defined]
223
+
224
+
225
+ def unpatch() -> None:
226
+ """Restore the original Groq methods and remove the patch flag.
227
+
228
+ Safe to call even if :func:`patch` was never called.
229
+
230
+ Raises:
231
+ ImportError: If the ``groq`` package is not installed.
232
+ """
233
+ groq_mod = _require_groq()
234
+
235
+ if not getattr(groq_mod, _PATCH_FLAG, False):
236
+ return # nothing to do
237
+
238
+ try:
239
+ from groq.resources.chat.completions import ( # noqa: PLC0415
240
+ Completions, # type: ignore[import-untyped]
241
+ )
242
+
243
+ Completions.create = Completions._spanforge_orig_create # type: ignore[attr-defined,method-assign]
244
+ del Completions._spanforge_orig_create # type: ignore[attr-defined]
245
+ except (ImportError, AttributeError): # pragma: no cover
246
+ pass
247
+
248
+ try:
249
+ from groq.resources.chat.completions import ( # noqa: PLC0415
250
+ AsyncCompletions, # type: ignore[import-untyped]
251
+ )
252
+
253
+ AsyncCompletions.create = AsyncCompletions._spanforge_orig_create # type: ignore[attr-defined,method-assign]
254
+ del AsyncCompletions._spanforge_orig_create # type: ignore[attr-defined]
255
+ except (ImportError, AttributeError): # pragma: no cover
256
+ pass
257
+
258
+ try: # noqa: SIM105
259
+ del groq_mod._spanforge_patched # type: ignore[attr-defined]
260
+ except AttributeError: # pragma: no cover
261
+ pass
262
+
263
+
264
+ def is_patched() -> bool:
265
+ """Return ``True`` if the Groq client has been patched by spanforge.
266
+
267
+ Returns ``False`` if the ``groq`` package is not installed.
268
+ """
269
+ try:
270
+ groq_mod = _require_groq()
271
+ return bool(getattr(groq_mod, _PATCH_FLAG, False))
272
+ except ImportError:
273
+ return False
274
+
275
+
276
+ def normalize_response(
277
+ response: Any, # noqa: ANN401
278
+ ) -> tuple[TokenUsage, ModelInfo, CostBreakdown]:
279
+ """Extract structured observability data from a Groq chat completion.
280
+
281
+ The Groq SDK mirrors the OpenAI response structure, so token fields
282
+ follow the same ``prompt_tokens`` / ``completion_tokens`` naming.
283
+ Groq additionally provides per-request timing in ``usage.total_time``
284
+ (seconds); use :func:`get_duration_ms` to extract it separately.
285
+
286
+ Args:
287
+ response: A Groq ``ChatCompletion`` (or compatible object).
288
+
289
+ Returns:
290
+ A 3-tuple of ``(TokenUsage, ModelInfo, CostBreakdown)``.
291
+
292
+ Field mapping:
293
+
294
+ +--------------------------------------------+---------------------------+
295
+ | Groq field | SpanForge field |
296
+ +============================================+===========================+
297
+ | ``response.model`` | ``ModelInfo.name`` |
298
+ | ``usage.prompt_tokens`` | ``TokenUsage.input_tokens``|
299
+ | ``usage.completion_tokens`` | ``TokenUsage.output_tokens``|
300
+ | ``usage.total_tokens`` | ``TokenUsage.total_tokens``|
301
+ +--------------------------------------------+---------------------------+
302
+ """
303
+ # ------------------------------------------------------------------ usage
304
+ usage = getattr(response, "usage", None)
305
+ input_tokens: int = 0
306
+ output_tokens: int = 0
307
+ total_tokens: int = 0
308
+
309
+ if usage is not None:
310
+ input_tokens = int(getattr(usage, "prompt_tokens", 0) or 0)
311
+ output_tokens = int(getattr(usage, "completion_tokens", 0) or 0)
312
+ total_tokens = int(
313
+ getattr(usage, "total_tokens", input_tokens + output_tokens) or 0
314
+ )
315
+
316
+ token_usage = TokenUsage(
317
+ input_tokens=input_tokens,
318
+ output_tokens=output_tokens,
319
+ total_tokens=total_tokens,
320
+ )
321
+
322
+ # ---------------------------------------------------------------- model
323
+ model_name: str = getattr(response, "model", None) or "unknown"
324
+ model_info = ModelInfo(system=GenAISystem.GROQ, name=model_name)
325
+
326
+ # ----------------------------------------------------------------- cost
327
+ cost = _compute_cost(model_name, input_tokens, output_tokens)
328
+
329
+ return token_usage, model_info, cost
330
+
331
+
332
+ def get_duration_ms(response: Any) -> float | None: # noqa: ANN401
333
+ """Return the API-measured processing time in milliseconds from a Groq response.
334
+
335
+ Groq exposes sub-millisecond inference latency via ``usage.total_time``
336
+ (in seconds). This helper converts it to milliseconds.
337
+
338
+ Args:
339
+ response: A Groq ``ChatCompletion`` (or compatible object).
340
+
341
+ Returns:
342
+ Processing time in milliseconds, or ``None`` if not available.
343
+ """
344
+ usage = getattr(response, "usage", None)
345
+ if usage is None:
346
+ return None
347
+ total_time = getattr(usage, "total_time", None)
348
+ if total_time is None:
349
+ return None
350
+ try:
351
+ return float(total_time) * 1000.0
352
+ except (TypeError, ValueError):
353
+ return None
354
+
355
+
356
+ def list_models() -> list[str]:
357
+ """Return a sorted list of all Groq model names in the pricing table."""
358
+ return sorted(GROQ_PRICING.keys())
359
+
360
+
361
+ # ---------------------------------------------------------------------------
362
+ # Internal helpers
363
+ # ---------------------------------------------------------------------------
364
+
365
+
366
+ def _require_groq() -> Any: # noqa: ANN401
367
+ """Import and return the ``groq`` module, raising ``ImportError`` if absent."""
368
+ try:
369
+ import groq # type: ignore[import-untyped] # noqa: PLC0415
370
+ except ImportError as exc:
371
+ raise ImportError(
372
+ "The 'groq' package is required for spanforge Groq integration.\n"
373
+ "Install it with: pip install 'spanforge[groq]'"
374
+ ) from exc
375
+ else:
376
+ return groq
377
+
378
+
379
+ def _get_pricing(model: str) -> dict[str, float] | None:
380
+ """Return the pricing entry for *model*, or ``None`` if unknown."""
381
+ if model in GROQ_PRICING:
382
+ return GROQ_PRICING[model]
383
+
384
+ # Try prefix-only matches by stripping trailing date/version suffixes.
385
+ parts = model.rsplit("-", 3)
386
+ for i in range(len(parts) - 1, 0, -1):
387
+ candidate = "-".join(parts[:i])
388
+ if candidate in GROQ_PRICING:
389
+ return GROQ_PRICING[candidate]
390
+
391
+ return None
392
+
393
+
394
+ def _compute_cost(
395
+ model_name: str,
396
+ input_tokens: int,
397
+ output_tokens: int,
398
+ ) -> CostBreakdown:
399
+ """Compute :class:`~spanforge.namespaces.trace.CostBreakdown` from token counts."""
400
+ pricing = _get_pricing(model_name)
401
+ if pricing is None:
402
+ return CostBreakdown.zero()
403
+
404
+ input_cost = input_tokens * pricing["input"] / 1_000_000.0
405
+ output_cost = output_tokens * pricing["output"] / 1_000_000.0
406
+ total = input_cost + output_cost
407
+
408
+ return CostBreakdown(
409
+ input_cost_usd=input_cost,
410
+ output_cost_usd=output_cost,
411
+ total_cost_usd=total,
412
+ pricing_date=PRICING_DATE,
413
+ )
414
+
415
+
416
+ def _auto_populate_span(response: Any) -> None: # noqa: ANN401
417
+ """If there is an active span on this thread, populate it from *response*.
418
+
419
+ Silently does nothing if:
420
+
421
+ * There is no active span.
422
+ * ``normalize_response`` raises (malformed response).
423
+ * The span already has ``token_usage`` set (don't overwrite manual data).
424
+ """
425
+ try:
426
+ from spanforge._span import _span_stack # noqa: PLC0415
427
+
428
+ stack = _span_stack()
429
+ if not stack:
430
+ return
431
+ span = stack[-1]
432
+
433
+ if span.token_usage is not None:
434
+ return
435
+
436
+ token_usage, model_info, cost = normalize_response(response)
437
+ span.token_usage = token_usage
438
+ span.cost = cost
439
+
440
+ if span.model is None:
441
+ span.model = model_info.name
442
+
443
+ except Exception: # noqa: S110 # NOSONAR
444
+ pass