spanforge 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +695 -0
- spanforge/_batch_exporter.py +322 -0
- spanforge/_cli.py +3081 -0
- spanforge/_hooks.py +340 -0
- spanforge/_server.py +953 -0
- spanforge/_span.py +1015 -0
- spanforge/_store.py +287 -0
- spanforge/_stream.py +654 -0
- spanforge/_trace.py +334 -0
- spanforge/_tracer.py +253 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +464 -0
- spanforge/auto.py +181 -0
- spanforge/baseline.py +336 -0
- spanforge/config.py +460 -0
- spanforge/consent.py +227 -0
- spanforge/consumer.py +379 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1060 -0
- spanforge/cost.py +597 -0
- spanforge/debug.py +514 -0
- spanforge/drift.py +488 -0
- spanforge/egress.py +63 -0
- spanforge/eval.py +575 -0
- spanforge/event.py +1052 -0
- spanforge/exceptions.py +246 -0
- spanforge/explain.py +181 -0
- spanforge/export/__init__.py +50 -0
- spanforge/export/append_only.py +342 -0
- spanforge/export/cloud.py +349 -0
- spanforge/export/datadog.py +495 -0
- spanforge/export/grafana.py +331 -0
- spanforge/export/jsonl.py +198 -0
- spanforge/export/otel_bridge.py +291 -0
- spanforge/export/otlp.py +817 -0
- spanforge/export/otlp_bridge.py +231 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/webhook.py +302 -0
- spanforge/exporters/__init__.py +29 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/hitl.py +297 -0
- spanforge/inspect.py +429 -0
- spanforge/integrations/__init__.py +39 -0
- spanforge/integrations/_pricing.py +277 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/bedrock.py +306 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +349 -0
- spanforge/integrations/groq.py +444 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/llamaindex.py +370 -0
- spanforge/integrations/ollama.py +286 -0
- spanforge/integrations/openai.py +370 -0
- spanforge/integrations/together.py +485 -0
- spanforge/metrics.py +393 -0
- spanforge/metrics_export.py +342 -0
- spanforge/migrate.py +278 -0
- spanforge/model_registry.py +282 -0
- spanforge/models.py +407 -0
- spanforge/namespaces/__init__.py +215 -0
- spanforge/namespaces/audit.py +253 -0
- spanforge/namespaces/cache.py +209 -0
- spanforge/namespaces/chain.py +74 -0
- spanforge/namespaces/confidence.py +69 -0
- spanforge/namespaces/consent.py +85 -0
- spanforge/namespaces/cost.py +175 -0
- spanforge/namespaces/decision.py +135 -0
- spanforge/namespaces/diff.py +146 -0
- spanforge/namespaces/drift.py +79 -0
- spanforge/namespaces/eval_.py +232 -0
- spanforge/namespaces/fence.py +180 -0
- spanforge/namespaces/guard.py +104 -0
- spanforge/namespaces/hitl.py +92 -0
- spanforge/namespaces/latency.py +69 -0
- spanforge/namespaces/prompt.py +185 -0
- spanforge/namespaces/redact.py +172 -0
- spanforge/namespaces/template.py +197 -0
- spanforge/namespaces/tool_call.py +76 -0
- spanforge/namespaces/trace.py +1006 -0
- spanforge/normalizer.py +183 -0
- spanforge/presidio_backend.py +149 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +415 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +780 -0
- spanforge/sampling.py +500 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/signing.py +1152 -0
- spanforge/stream.py +559 -0
- spanforge/testing.py +376 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +304 -0
- spanforge/validate.py +383 -0
- spanforge-2.0.0.dist-info/METADATA +1777 -0
- spanforge-2.0.0.dist-info/RECORD +101 -0
- spanforge-2.0.0.dist-info/WHEEL +4 -0
- spanforge-2.0.0.dist-info/entry_points.txt +5 -0
- spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
"""spanforge.integrations.groq — Auto-instrumentation for the Groq Python SDK.
|
|
2
|
+
|
|
3
|
+
This module monkey-patches the Groq client so every
|
|
4
|
+
``client.chat.completions.create(...)`` call automatically populates the
|
|
5
|
+
active :class:`~spanforge._span.Span` with:
|
|
6
|
+
|
|
7
|
+
* :class:`~spanforge.namespaces.trace.TokenUsage` (input / output token counts)
|
|
8
|
+
* :class:`~spanforge.namespaces.trace.ModelInfo` (provider = ``groq``, name
|
|
9
|
+
from response)
|
|
10
|
+
* :class:`~spanforge.namespaces.trace.CostBreakdown` (computed from the static
|
|
11
|
+
pricing table below)
|
|
12
|
+
|
|
13
|
+
The Groq SDK mirrors the OpenAI API surface, so the response object has the
|
|
14
|
+
same ``usage.prompt_tokens`` / ``usage.completion_tokens`` fields.
|
|
15
|
+
Additionally, Groq exposes per-request timing via ``usage.total_time``
|
|
16
|
+
(seconds). Use :func:`get_duration_ms` to extract the API-measured latency
|
|
17
|
+
from a response object.
|
|
18
|
+
|
|
19
|
+
Usage::
|
|
20
|
+
|
|
21
|
+
from spanforge.integrations import groq as groq_integration
|
|
22
|
+
groq_integration.patch()
|
|
23
|
+
|
|
24
|
+
from groq import Groq
|
|
25
|
+
client = Groq()
|
|
26
|
+
|
|
27
|
+
import spanforge
|
|
28
|
+
spanforge.configure(exporter="console")
|
|
29
|
+
|
|
30
|
+
with spanforge.span("groq-chat", model="llama3-70b-8192") as span:
|
|
31
|
+
resp = client.chat.completions.create(
|
|
32
|
+
model="llama3-70b-8192",
|
|
33
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
34
|
+
)
|
|
35
|
+
# → span.token_usage and span.cost auto-populated on exit
|
|
36
|
+
|
|
37
|
+
Calling ``patch()`` is **idempotent** — calling it multiple times has no
|
|
38
|
+
effect. Call :func:`unpatch` to restore the original methods.
|
|
39
|
+
|
|
40
|
+
Install with::
|
|
41
|
+
|
|
42
|
+
pip install "spanforge[groq]"
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
from __future__ import annotations
|
|
46
|
+
|
|
47
|
+
import functools
|
|
48
|
+
from typing import Any
|
|
49
|
+
|
|
50
|
+
from spanforge.namespaces.trace import (
|
|
51
|
+
CostBreakdown,
|
|
52
|
+
GenAISystem,
|
|
53
|
+
ModelInfo,
|
|
54
|
+
TokenUsage,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
__all__ = [
|
|
58
|
+
"get_duration_ms",
|
|
59
|
+
"is_patched",
|
|
60
|
+
"normalize_response",
|
|
61
|
+
"patch",
|
|
62
|
+
"unpatch",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
# Static pricing table (USD per million tokens, effective 2026-03-04)
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
PRICING_DATE: str = "2026-03-04"
|
|
70
|
+
|
|
71
|
+
#: Groq model pricing — USD per million tokens.
|
|
72
|
+
GROQ_PRICING: dict[str, dict[str, float]] = {
|
|
73
|
+
# ------------------------------------------------------------------
|
|
74
|
+
# LLaMA 3.3
|
|
75
|
+
# ------------------------------------------------------------------
|
|
76
|
+
"llama-3.3-70b-versatile": {
|
|
77
|
+
"input": 0.59,
|
|
78
|
+
"output": 0.79,
|
|
79
|
+
},
|
|
80
|
+
"llama-3.3-70b-specdec": {
|
|
81
|
+
"input": 0.59,
|
|
82
|
+
"output": 0.99,
|
|
83
|
+
},
|
|
84
|
+
# ------------------------------------------------------------------
|
|
85
|
+
# LLaMA 3.1
|
|
86
|
+
# ------------------------------------------------------------------
|
|
87
|
+
"llama-3.1-70b-versatile": {
|
|
88
|
+
"input": 0.59,
|
|
89
|
+
"output": 0.79,
|
|
90
|
+
},
|
|
91
|
+
"llama-3.1-8b-instant": {
|
|
92
|
+
"input": 0.05,
|
|
93
|
+
"output": 0.08,
|
|
94
|
+
},
|
|
95
|
+
"llama-3.1-405b-reasoning": {
|
|
96
|
+
"input": 3.00,
|
|
97
|
+
"output": 3.00,
|
|
98
|
+
},
|
|
99
|
+
# ------------------------------------------------------------------
|
|
100
|
+
# LLaMA 3.2
|
|
101
|
+
# ------------------------------------------------------------------
|
|
102
|
+
"llama-3.2-1b-preview": {
|
|
103
|
+
"input": 0.04,
|
|
104
|
+
"output": 0.04,
|
|
105
|
+
},
|
|
106
|
+
"llama-3.2-3b-preview": {
|
|
107
|
+
"input": 0.06,
|
|
108
|
+
"output": 0.06,
|
|
109
|
+
},
|
|
110
|
+
"llama-3.2-11b-vision-preview": {
|
|
111
|
+
"input": 0.18,
|
|
112
|
+
"output": 0.18,
|
|
113
|
+
},
|
|
114
|
+
"llama-3.2-90b-vision-preview": {
|
|
115
|
+
"input": 0.90,
|
|
116
|
+
"output": 0.90,
|
|
117
|
+
},
|
|
118
|
+
# ------------------------------------------------------------------
|
|
119
|
+
# LLaMA 3 (legacy names)
|
|
120
|
+
# ------------------------------------------------------------------
|
|
121
|
+
"llama3-70b-8192": {
|
|
122
|
+
"input": 0.59,
|
|
123
|
+
"output": 0.79,
|
|
124
|
+
},
|
|
125
|
+
"llama3-8b-8192": {
|
|
126
|
+
"input": 0.05,
|
|
127
|
+
"output": 0.08,
|
|
128
|
+
},
|
|
129
|
+
"llama3-groq-70b-8192-tool-use-preview": {
|
|
130
|
+
"input": 0.89,
|
|
131
|
+
"output": 0.89,
|
|
132
|
+
},
|
|
133
|
+
"llama3-groq-8b-8192-tool-use-preview": {
|
|
134
|
+
"input": 0.19,
|
|
135
|
+
"output": 0.19,
|
|
136
|
+
},
|
|
137
|
+
# ------------------------------------------------------------------
|
|
138
|
+
# Mixtral
|
|
139
|
+
# ------------------------------------------------------------------
|
|
140
|
+
"mixtral-8x7b-32768": {
|
|
141
|
+
"input": 0.24,
|
|
142
|
+
"output": 0.24,
|
|
143
|
+
},
|
|
144
|
+
# ------------------------------------------------------------------
|
|
145
|
+
# Gemma
|
|
146
|
+
# ------------------------------------------------------------------
|
|
147
|
+
"gemma-7b-it": {
|
|
148
|
+
"input": 0.07,
|
|
149
|
+
"output": 0.07,
|
|
150
|
+
},
|
|
151
|
+
"gemma2-9b-it": {
|
|
152
|
+
"input": 0.20,
|
|
153
|
+
"output": 0.20,
|
|
154
|
+
},
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
# Sentinel attribute set on the groq module to prevent double-patching.
|
|
158
|
+
_PATCH_FLAG = "_spanforge_patched"
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
# Public API
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def patch() -> None:
|
|
167
|
+
"""Monkey-patch the Groq client to auto-instrument all chat completions.
|
|
168
|
+
|
|
169
|
+
Wraps both ``groq.resources.chat.completions.Completions.create``
|
|
170
|
+
(sync) and ``AsyncCompletions.create`` (async). The wrapper calls
|
|
171
|
+
:func:`normalize_response` on the result and, if a span is currently
|
|
172
|
+
active on this thread, updates it.
|
|
173
|
+
|
|
174
|
+
This function is **idempotent** — safe to call multiple times.
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
ImportError: If the ``groq`` package is not installed.
|
|
178
|
+
"""
|
|
179
|
+
groq_mod = _require_groq()
|
|
180
|
+
|
|
181
|
+
if getattr(groq_mod, _PATCH_FLAG, False):
|
|
182
|
+
return # already patched
|
|
183
|
+
|
|
184
|
+
# --- sync ----------------------------------------------------------------
|
|
185
|
+
try:
|
|
186
|
+
from groq.resources.chat.completions import ( # noqa: PLC0415
|
|
187
|
+
Completions, # type: ignore[import-untyped]
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
_orig_sync = Completions.create # type: ignore[attr-defined]
|
|
191
|
+
|
|
192
|
+
@functools.wraps(_orig_sync)
|
|
193
|
+
def _patched_sync(self: Any, *args: Any, **kwargs: Any) -> Any: # noqa: ANN401
|
|
194
|
+
response = _orig_sync(self, *args, **kwargs)
|
|
195
|
+
_auto_populate_span(response)
|
|
196
|
+
return response
|
|
197
|
+
|
|
198
|
+
Completions.create = _patched_sync # type: ignore[method-assign]
|
|
199
|
+
Completions._spanforge_orig_create = _orig_sync # type: ignore[attr-defined]
|
|
200
|
+
except (ImportError, AttributeError): # pragma: no cover
|
|
201
|
+
pass
|
|
202
|
+
|
|
203
|
+
# --- async ---------------------------------------------------------------
|
|
204
|
+
try:
|
|
205
|
+
from groq.resources.chat.completions import ( # noqa: PLC0415
|
|
206
|
+
AsyncCompletions, # type: ignore[import-untyped]
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
_orig_async = AsyncCompletions.create # type: ignore[attr-defined]
|
|
210
|
+
|
|
211
|
+
@functools.wraps(_orig_async)
|
|
212
|
+
async def _patched_async(self: Any, *args: Any, **kwargs: Any) -> Any: # noqa: ANN401
|
|
213
|
+
response = await _orig_async(self, *args, **kwargs)
|
|
214
|
+
_auto_populate_span(response)
|
|
215
|
+
return response
|
|
216
|
+
|
|
217
|
+
AsyncCompletions.create = _patched_async # type: ignore[method-assign]
|
|
218
|
+
AsyncCompletions._spanforge_orig_create = _orig_async # type: ignore[attr-defined]
|
|
219
|
+
except (ImportError, AttributeError): # pragma: no cover
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
groq_mod._spanforge_patched = True # type: ignore[attr-defined]
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def unpatch() -> None:
|
|
226
|
+
"""Restore the original Groq methods and remove the patch flag.
|
|
227
|
+
|
|
228
|
+
Safe to call even if :func:`patch` was never called.
|
|
229
|
+
|
|
230
|
+
Raises:
|
|
231
|
+
ImportError: If the ``groq`` package is not installed.
|
|
232
|
+
"""
|
|
233
|
+
groq_mod = _require_groq()
|
|
234
|
+
|
|
235
|
+
if not getattr(groq_mod, _PATCH_FLAG, False):
|
|
236
|
+
return # nothing to do
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
from groq.resources.chat.completions import ( # noqa: PLC0415
|
|
240
|
+
Completions, # type: ignore[import-untyped]
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
Completions.create = Completions._spanforge_orig_create # type: ignore[attr-defined,method-assign]
|
|
244
|
+
del Completions._spanforge_orig_create # type: ignore[attr-defined]
|
|
245
|
+
except (ImportError, AttributeError): # pragma: no cover
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
from groq.resources.chat.completions import ( # noqa: PLC0415
|
|
250
|
+
AsyncCompletions, # type: ignore[import-untyped]
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
AsyncCompletions.create = AsyncCompletions._spanforge_orig_create # type: ignore[attr-defined,method-assign]
|
|
254
|
+
del AsyncCompletions._spanforge_orig_create # type: ignore[attr-defined]
|
|
255
|
+
except (ImportError, AttributeError): # pragma: no cover
|
|
256
|
+
pass
|
|
257
|
+
|
|
258
|
+
try: # noqa: SIM105
|
|
259
|
+
del groq_mod._spanforge_patched # type: ignore[attr-defined]
|
|
260
|
+
except AttributeError: # pragma: no cover
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def is_patched() -> bool:
|
|
265
|
+
"""Return ``True`` if the Groq client has been patched by spanforge.
|
|
266
|
+
|
|
267
|
+
Returns ``False`` if the ``groq`` package is not installed.
|
|
268
|
+
"""
|
|
269
|
+
try:
|
|
270
|
+
groq_mod = _require_groq()
|
|
271
|
+
return bool(getattr(groq_mod, _PATCH_FLAG, False))
|
|
272
|
+
except ImportError:
|
|
273
|
+
return False
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def normalize_response(
|
|
277
|
+
response: Any, # noqa: ANN401
|
|
278
|
+
) -> tuple[TokenUsage, ModelInfo, CostBreakdown]:
|
|
279
|
+
"""Extract structured observability data from a Groq chat completion.
|
|
280
|
+
|
|
281
|
+
The Groq SDK mirrors the OpenAI response structure, so token fields
|
|
282
|
+
follow the same ``prompt_tokens`` / ``completion_tokens`` naming.
|
|
283
|
+
Groq additionally provides per-request timing in ``usage.total_time``
|
|
284
|
+
(seconds); use :func:`get_duration_ms` to extract it separately.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
response: A Groq ``ChatCompletion`` (or compatible object).
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
A 3-tuple of ``(TokenUsage, ModelInfo, CostBreakdown)``.
|
|
291
|
+
|
|
292
|
+
Field mapping:
|
|
293
|
+
|
|
294
|
+
+--------------------------------------------+---------------------------+
|
|
295
|
+
| Groq field | SpanForge field |
|
|
296
|
+
+============================================+===========================+
|
|
297
|
+
| ``response.model`` | ``ModelInfo.name`` |
|
|
298
|
+
| ``usage.prompt_tokens`` | ``TokenUsage.input_tokens``|
|
|
299
|
+
| ``usage.completion_tokens`` | ``TokenUsage.output_tokens``|
|
|
300
|
+
| ``usage.total_tokens`` | ``TokenUsage.total_tokens``|
|
|
301
|
+
+--------------------------------------------+---------------------------+
|
|
302
|
+
"""
|
|
303
|
+
# ------------------------------------------------------------------ usage
|
|
304
|
+
usage = getattr(response, "usage", None)
|
|
305
|
+
input_tokens: int = 0
|
|
306
|
+
output_tokens: int = 0
|
|
307
|
+
total_tokens: int = 0
|
|
308
|
+
|
|
309
|
+
if usage is not None:
|
|
310
|
+
input_tokens = int(getattr(usage, "prompt_tokens", 0) or 0)
|
|
311
|
+
output_tokens = int(getattr(usage, "completion_tokens", 0) or 0)
|
|
312
|
+
total_tokens = int(
|
|
313
|
+
getattr(usage, "total_tokens", input_tokens + output_tokens) or 0
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
token_usage = TokenUsage(
|
|
317
|
+
input_tokens=input_tokens,
|
|
318
|
+
output_tokens=output_tokens,
|
|
319
|
+
total_tokens=total_tokens,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# ---------------------------------------------------------------- model
|
|
323
|
+
model_name: str = getattr(response, "model", None) or "unknown"
|
|
324
|
+
model_info = ModelInfo(system=GenAISystem.GROQ, name=model_name)
|
|
325
|
+
|
|
326
|
+
# ----------------------------------------------------------------- cost
|
|
327
|
+
cost = _compute_cost(model_name, input_tokens, output_tokens)
|
|
328
|
+
|
|
329
|
+
return token_usage, model_info, cost
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def get_duration_ms(response: Any) -> float | None: # noqa: ANN401
|
|
333
|
+
"""Return the API-measured processing time in milliseconds from a Groq response.
|
|
334
|
+
|
|
335
|
+
Groq exposes sub-millisecond inference latency via ``usage.total_time``
|
|
336
|
+
(in seconds). This helper converts it to milliseconds.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
response: A Groq ``ChatCompletion`` (or compatible object).
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Processing time in milliseconds, or ``None`` if not available.
|
|
343
|
+
"""
|
|
344
|
+
usage = getattr(response, "usage", None)
|
|
345
|
+
if usage is None:
|
|
346
|
+
return None
|
|
347
|
+
total_time = getattr(usage, "total_time", None)
|
|
348
|
+
if total_time is None:
|
|
349
|
+
return None
|
|
350
|
+
try:
|
|
351
|
+
return float(total_time) * 1000.0
|
|
352
|
+
except (TypeError, ValueError):
|
|
353
|
+
return None
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def list_models() -> list[str]:
|
|
357
|
+
"""Return a sorted list of all Groq model names in the pricing table."""
|
|
358
|
+
return sorted(GROQ_PRICING.keys())
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
# ---------------------------------------------------------------------------
|
|
362
|
+
# Internal helpers
|
|
363
|
+
# ---------------------------------------------------------------------------
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def _require_groq() -> Any: # noqa: ANN401
|
|
367
|
+
"""Import and return the ``groq`` module, raising ``ImportError`` if absent."""
|
|
368
|
+
try:
|
|
369
|
+
import groq # type: ignore[import-untyped] # noqa: PLC0415
|
|
370
|
+
except ImportError as exc:
|
|
371
|
+
raise ImportError(
|
|
372
|
+
"The 'groq' package is required for spanforge Groq integration.\n"
|
|
373
|
+
"Install it with: pip install 'spanforge[groq]'"
|
|
374
|
+
) from exc
|
|
375
|
+
else:
|
|
376
|
+
return groq
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _get_pricing(model: str) -> dict[str, float] | None:
|
|
380
|
+
"""Return the pricing entry for *model*, or ``None`` if unknown."""
|
|
381
|
+
if model in GROQ_PRICING:
|
|
382
|
+
return GROQ_PRICING[model]
|
|
383
|
+
|
|
384
|
+
# Try prefix-only matches by stripping trailing date/version suffixes.
|
|
385
|
+
parts = model.rsplit("-", 3)
|
|
386
|
+
for i in range(len(parts) - 1, 0, -1):
|
|
387
|
+
candidate = "-".join(parts[:i])
|
|
388
|
+
if candidate in GROQ_PRICING:
|
|
389
|
+
return GROQ_PRICING[candidate]
|
|
390
|
+
|
|
391
|
+
return None
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def _compute_cost(
|
|
395
|
+
model_name: str,
|
|
396
|
+
input_tokens: int,
|
|
397
|
+
output_tokens: int,
|
|
398
|
+
) -> CostBreakdown:
|
|
399
|
+
"""Compute :class:`~spanforge.namespaces.trace.CostBreakdown` from token counts."""
|
|
400
|
+
pricing = _get_pricing(model_name)
|
|
401
|
+
if pricing is None:
|
|
402
|
+
return CostBreakdown.zero()
|
|
403
|
+
|
|
404
|
+
input_cost = input_tokens * pricing["input"] / 1_000_000.0
|
|
405
|
+
output_cost = output_tokens * pricing["output"] / 1_000_000.0
|
|
406
|
+
total = input_cost + output_cost
|
|
407
|
+
|
|
408
|
+
return CostBreakdown(
|
|
409
|
+
input_cost_usd=input_cost,
|
|
410
|
+
output_cost_usd=output_cost,
|
|
411
|
+
total_cost_usd=total,
|
|
412
|
+
pricing_date=PRICING_DATE,
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def _auto_populate_span(response: Any) -> None: # noqa: ANN401
|
|
417
|
+
"""If there is an active span on this thread, populate it from *response*.
|
|
418
|
+
|
|
419
|
+
Silently does nothing if:
|
|
420
|
+
|
|
421
|
+
* There is no active span.
|
|
422
|
+
* ``normalize_response`` raises (malformed response).
|
|
423
|
+
* The span already has ``token_usage`` set (don't overwrite manual data).
|
|
424
|
+
"""
|
|
425
|
+
try:
|
|
426
|
+
from spanforge._span import _span_stack # noqa: PLC0415
|
|
427
|
+
|
|
428
|
+
stack = _span_stack()
|
|
429
|
+
if not stack:
|
|
430
|
+
return
|
|
431
|
+
span = stack[-1]
|
|
432
|
+
|
|
433
|
+
if span.token_usage is not None:
|
|
434
|
+
return
|
|
435
|
+
|
|
436
|
+
token_usage, model_info, cost = normalize_response(response)
|
|
437
|
+
span.token_usage = token_usage
|
|
438
|
+
span.cost = cost
|
|
439
|
+
|
|
440
|
+
if span.model is None:
|
|
441
|
+
span.model = model_info.name
|
|
442
|
+
|
|
443
|
+
except Exception: # noqa: S110 # NOSONAR
|
|
444
|
+
pass
|