splunk-otel-util-genai 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opentelemetry/util/genai/__init__.py +17 -0
- opentelemetry/util/genai/_fsspec_upload/__init__.py +39 -0
- opentelemetry/util/genai/_fsspec_upload/fsspec_hook.py +184 -0
- opentelemetry/util/genai/attributes.py +60 -0
- opentelemetry/util/genai/callbacks.py +24 -0
- opentelemetry/util/genai/config.py +184 -0
- opentelemetry/util/genai/debug.py +183 -0
- opentelemetry/util/genai/emitters/__init__.py +25 -0
- opentelemetry/util/genai/emitters/composite.py +186 -0
- opentelemetry/util/genai/emitters/configuration.py +324 -0
- opentelemetry/util/genai/emitters/content_events.py +153 -0
- opentelemetry/util/genai/emitters/evaluation.py +519 -0
- opentelemetry/util/genai/emitters/metrics.py +308 -0
- opentelemetry/util/genai/emitters/span.py +774 -0
- opentelemetry/util/genai/emitters/spec.py +48 -0
- opentelemetry/util/genai/emitters/utils.py +961 -0
- opentelemetry/util/genai/environment_variables.py +200 -0
- opentelemetry/util/genai/handler.py +1002 -0
- opentelemetry/util/genai/instruments.py +44 -0
- opentelemetry/util/genai/interfaces.py +58 -0
- opentelemetry/util/genai/plugins.py +114 -0
- opentelemetry/util/genai/span_context.py +80 -0
- opentelemetry/util/genai/types.py +440 -0
- opentelemetry/util/genai/upload_hook.py +119 -0
- opentelemetry/util/genai/utils.py +182 -0
- opentelemetry/util/genai/version.py +15 -0
- splunk_otel_util_genai-0.1.3.dist-info/METADATA +70 -0
- splunk_otel_util_genai-0.1.3.dist-info/RECORD +31 -0
- splunk_otel_util_genai-0.1.3.dist-info/WHEEL +4 -0
- splunk_otel_util_genai-0.1.3.dist-info/entry_points.txt +5 -0
- splunk_otel_util_genai-0.1.3.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright The OpenTelemetry Authors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from pkgutil import extend_path
|
|
16
|
+
|
|
17
|
+
__path__ = extend_path(__path__, __name__)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Copyright The OpenTelemetry Authors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from os import environ
|
|
18
|
+
|
|
19
|
+
from opentelemetry.util.genai.environment_variables import (
|
|
20
|
+
OTEL_INSTRUMENTATION_GENAI_UPLOAD_BASE_PATH,
|
|
21
|
+
)
|
|
22
|
+
from opentelemetry.util.genai.upload_hook import UploadHook, _NoOpUploadHook
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def fsspec_upload_hook() -> UploadHook:
|
|
26
|
+
# If fsspec is not installed the hook will be a no-op.
|
|
27
|
+
try:
|
|
28
|
+
# pylint: disable=import-outside-toplevel
|
|
29
|
+
from opentelemetry.util.genai._fsspec_upload.fsspec_hook import (
|
|
30
|
+
FsspecUploadHook,
|
|
31
|
+
)
|
|
32
|
+
except ImportError:
|
|
33
|
+
return _NoOpUploadHook()
|
|
34
|
+
|
|
35
|
+
base_path = environ.get(OTEL_INSTRUMENTATION_GENAI_UPLOAD_BASE_PATH)
|
|
36
|
+
if not base_path:
|
|
37
|
+
return _NoOpUploadHook()
|
|
38
|
+
|
|
39
|
+
return FsspecUploadHook(base_path=base_path)
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# Copyright The OpenTelemetry Authors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import posixpath
|
|
21
|
+
import threading
|
|
22
|
+
from concurrent.futures import Future, ThreadPoolExecutor
|
|
23
|
+
from dataclasses import asdict, dataclass
|
|
24
|
+
from functools import partial
|
|
25
|
+
from typing import Any, Callable, Literal, TextIO, cast
|
|
26
|
+
from uuid import uuid4
|
|
27
|
+
|
|
28
|
+
import fsspec
|
|
29
|
+
|
|
30
|
+
from opentelemetry._logs import LogRecord
|
|
31
|
+
from opentelemetry.trace import Span
|
|
32
|
+
from opentelemetry.util.genai import types
|
|
33
|
+
from opentelemetry.util.genai.upload_hook import UploadHook
|
|
34
|
+
|
|
35
|
+
_logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class Completion:
|
|
40
|
+
inputs: list[types.InputMessage]
|
|
41
|
+
outputs: list[types.OutputMessage]
|
|
42
|
+
system_instruction: list[types.MessagePart]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class CompletionRefs:
|
|
47
|
+
inputs_ref: str
|
|
48
|
+
outputs_ref: str
|
|
49
|
+
system_instruction_ref: str
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
JsonEncodeable = list[dict[str, Any]]
|
|
53
|
+
|
|
54
|
+
# mapping of upload path to function computing upload data dict
|
|
55
|
+
UploadData = dict[str, Callable[[], JsonEncodeable]]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def fsspec_open(urlpath: str, mode: Literal["w"]) -> TextIO:
|
|
59
|
+
"""typed wrapper around `fsspec.open`"""
|
|
60
|
+
return cast(TextIO, fsspec.open(urlpath, mode)) # pyright: ignore[reportUnknownMemberType]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class FsspecUploadHook(UploadHook):
|
|
64
|
+
"""An upload hook using ``fsspec`` to upload to external storage
|
|
65
|
+
|
|
66
|
+
This function can be used as the
|
|
67
|
+
:func:`~opentelemetry.util.genai.upload_hook.load_upload_hook` implementation by
|
|
68
|
+
setting :envvar:`OTEL_INSTRUMENTATION_GENAI_UPLOAD_HOOK` to ``fsspec``.
|
|
69
|
+
:envvar:`OTEL_INSTRUMENTATION_GENAI_UPLOAD_BASE_PATH` must be configured to specify the
|
|
70
|
+
base path for uploads.
|
|
71
|
+
|
|
72
|
+
Both the ``fsspec`` and ``opentelemetry-sdk`` packages should be installed, or a no-op
|
|
73
|
+
implementation will be used instead. You can use ``opentelemetry-util-genai[fsspec]``
|
|
74
|
+
as a requirement to achieve this.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
*,
|
|
80
|
+
base_path: str,
|
|
81
|
+
max_size: int = 20,
|
|
82
|
+
) -> None:
|
|
83
|
+
self._base_path = base_path
|
|
84
|
+
self._max_size = max_size
|
|
85
|
+
|
|
86
|
+
# Use a ThreadPoolExecutor for its queueing and thread management. The semaphore
|
|
87
|
+
# limits the number of queued tasks. If the queue is full, data will be dropped.
|
|
88
|
+
self._executor = ThreadPoolExecutor(max_workers=max_size)
|
|
89
|
+
self._semaphore = threading.BoundedSemaphore(max_size)
|
|
90
|
+
|
|
91
|
+
def _submit_all(self, upload_data: UploadData) -> None:
|
|
92
|
+
def done(future: Future[None]) -> None:
|
|
93
|
+
self._semaphore.release()
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
future.result()
|
|
97
|
+
except Exception: # pylint: disable=broad-except
|
|
98
|
+
_logger.exception("fsspec uploader failed")
|
|
99
|
+
|
|
100
|
+
for path, json_encodeable in upload_data.items():
|
|
101
|
+
# could not acquire, drop data
|
|
102
|
+
if not self._semaphore.acquire(blocking=False): # pylint: disable=consider-using-with
|
|
103
|
+
_logger.warning(
|
|
104
|
+
"fsspec upload queue is full, dropping upload %s",
|
|
105
|
+
path,
|
|
106
|
+
)
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
fut = self._executor.submit(
|
|
111
|
+
self._do_upload, path, json_encodeable
|
|
112
|
+
)
|
|
113
|
+
fut.add_done_callback(done)
|
|
114
|
+
except RuntimeError:
|
|
115
|
+
_logger.info(
|
|
116
|
+
"attempting to upload file after FsspecUploadHook.shutdown() was already called"
|
|
117
|
+
)
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
def _calculate_ref_path(self) -> CompletionRefs:
|
|
121
|
+
# TODO: experimental with using the trace_id and span_id, or fetching
|
|
122
|
+
# gen_ai.response.id from the active span.
|
|
123
|
+
|
|
124
|
+
uuid_str = str(uuid4())
|
|
125
|
+
return CompletionRefs(
|
|
126
|
+
inputs_ref=posixpath.join(
|
|
127
|
+
self._base_path, f"{uuid_str}_inputs.json"
|
|
128
|
+
),
|
|
129
|
+
outputs_ref=posixpath.join(
|
|
130
|
+
self._base_path, f"{uuid_str}_outputs.json"
|
|
131
|
+
),
|
|
132
|
+
system_instruction_ref=posixpath.join(
|
|
133
|
+
self._base_path, f"{uuid_str}_system_instruction.json"
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
@staticmethod
|
|
138
|
+
def _do_upload(
|
|
139
|
+
path: str, json_encodeable: Callable[[], JsonEncodeable]
|
|
140
|
+
) -> None:
|
|
141
|
+
with fsspec_open(path, "w") as file:
|
|
142
|
+
json.dump(json_encodeable(), file, separators=(",", ":"))
|
|
143
|
+
|
|
144
|
+
def upload(
|
|
145
|
+
self,
|
|
146
|
+
*,
|
|
147
|
+
inputs: list[types.InputMessage],
|
|
148
|
+
outputs: list[types.OutputMessage],
|
|
149
|
+
system_instruction: list[types.MessagePart],
|
|
150
|
+
span: Span | None = None,
|
|
151
|
+
log_record: LogRecord | None = None,
|
|
152
|
+
**kwargs: Any,
|
|
153
|
+
) -> None:
|
|
154
|
+
completion = Completion(
|
|
155
|
+
inputs=inputs,
|
|
156
|
+
outputs=outputs,
|
|
157
|
+
system_instruction=system_instruction,
|
|
158
|
+
)
|
|
159
|
+
# generate the paths to upload to
|
|
160
|
+
ref_names = self._calculate_ref_path()
|
|
161
|
+
|
|
162
|
+
def to_dict(
|
|
163
|
+
dataclass_list: list[types.InputMessage]
|
|
164
|
+
| list[types.OutputMessage]
|
|
165
|
+
| list[types.MessagePart],
|
|
166
|
+
) -> JsonEncodeable:
|
|
167
|
+
return [asdict(dc) for dc in dataclass_list]
|
|
168
|
+
|
|
169
|
+
self._submit_all(
|
|
170
|
+
{
|
|
171
|
+
# Use partial to defer as much as possible to the background threads
|
|
172
|
+
ref_names.inputs_ref: partial(to_dict, completion.inputs),
|
|
173
|
+
ref_names.outputs_ref: partial(to_dict, completion.outputs),
|
|
174
|
+
ref_names.system_instruction_ref: partial(
|
|
175
|
+
to_dict, completion.system_instruction
|
|
176
|
+
),
|
|
177
|
+
},
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# TODO: stamp the refs on telemetry
|
|
181
|
+
|
|
182
|
+
def shutdown(self) -> None:
|
|
183
|
+
# TODO: support timeout
|
|
184
|
+
self._executor.shutdown()
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized constants for GenAI telemetry attribute names.
|
|
3
|
+
This module replaces inline string literals for span & event attributes.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
# Semantic attribute names for core GenAI spans/events
|
|
7
|
+
GEN_AI_PROVIDER_NAME = "gen_ai.provider.name"
|
|
8
|
+
GEN_AI_INPUT_MESSAGES = "gen_ai.input.messages"
|
|
9
|
+
GEN_AI_OUTPUT_MESSAGES = "gen_ai.output.messages"
|
|
10
|
+
GEN_AI_FRAMEWORK = "gen_ai.framework"
|
|
11
|
+
GEN_AI_COMPLETION_PREFIX = "gen_ai.completion"
|
|
12
|
+
|
|
13
|
+
# Additional semantic attribute constants
|
|
14
|
+
GEN_AI_OPERATION_NAME = "gen_ai.operation.name"
|
|
15
|
+
GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
|
|
16
|
+
GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
|
|
17
|
+
GEN_AI_RESPONSE_ID = "gen_ai.response.id"
|
|
18
|
+
GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
|
|
19
|
+
GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
|
|
20
|
+
GEN_AI_EVALUATION_NAME = "gen_ai.evaluation.name"
|
|
21
|
+
GEN_AI_EVALUATION_SCORE_VALUE = "gen_ai.evaluation.score.value"
|
|
22
|
+
GEN_AI_EVALUATION_SCORE_LABEL = "gen_ai.evaluation.score.label"
|
|
23
|
+
GEN_AI_EVALUATION_EXPLANATION = "gen_ai.evaluation.explanation"
|
|
24
|
+
GEN_AI_EVALUATION_ATTRIBUTES_PREFIX = "gen_ai.evaluation.attributes."
|
|
25
|
+
|
|
26
|
+
# Agent attributes (from semantic conventions)
|
|
27
|
+
GEN_AI_AGENT_NAME = "gen_ai.agent.name"
|
|
28
|
+
GEN_AI_AGENT_ID = "gen_ai.agent.id"
|
|
29
|
+
GEN_AI_AGENT_DESCRIPTION = "gen_ai.agent.description"
|
|
30
|
+
GEN_AI_AGENT_TOOLS = "gen_ai.agent.tools"
|
|
31
|
+
GEN_AI_AGENT_TYPE = "gen_ai.agent.type"
|
|
32
|
+
GEN_AI_AGENT_SYSTEM_INSTRUCTIONS = "gen_ai.agent.system_instructions"
|
|
33
|
+
GEN_AI_AGENT_INPUT_CONTEXT = "gen_ai.agent.input_context"
|
|
34
|
+
GEN_AI_AGENT_OUTPUT_RESULT = "gen_ai.agent.output_result"
|
|
35
|
+
|
|
36
|
+
# Workflow attributes (not in semantic conventions)
|
|
37
|
+
GEN_AI_WORKFLOW_NAME = "gen_ai.workflow.name"
|
|
38
|
+
GEN_AI_WORKFLOW_TYPE = "gen_ai.workflow.type"
|
|
39
|
+
GEN_AI_WORKFLOW_DESCRIPTION = "gen_ai.workflow.description"
|
|
40
|
+
GEN_AI_WORKFLOW_INITIAL_INPUT = "gen_ai.workflow.initial_input"
|
|
41
|
+
GEN_AI_WORKFLOW_FINAL_OUTPUT = "gen_ai.workflow.final_output"
|
|
42
|
+
|
|
43
|
+
# Step attributes (not in semantic conventions)
|
|
44
|
+
GEN_AI_STEP_NAME = "gen_ai.step.name"
|
|
45
|
+
GEN_AI_STEP_TYPE = "gen_ai.step.type"
|
|
46
|
+
GEN_AI_STEP_OBJECTIVE = "gen_ai.step.objective"
|
|
47
|
+
GEN_AI_STEP_SOURCE = "gen_ai.step.source"
|
|
48
|
+
GEN_AI_STEP_ASSIGNED_AGENT = "gen_ai.step.assigned_agent"
|
|
49
|
+
GEN_AI_STEP_STATUS = "gen_ai.step.status"
|
|
50
|
+
GEN_AI_STEP_INPUT_DATA = "gen_ai.step.input_data"
|
|
51
|
+
GEN_AI_STEP_OUTPUT_DATA = "gen_ai.step.output_data"
|
|
52
|
+
|
|
53
|
+
# Embedding attributes
|
|
54
|
+
GEN_AI_EMBEDDINGS_DIMENSION_COUNT = "gen_ai.embeddings.dimension.count"
|
|
55
|
+
GEN_AI_EMBEDDINGS_INPUT_TEXTS = "gen_ai.embeddings.input.texts"
|
|
56
|
+
GEN_AI_REQUEST_ENCODING_FORMATS = "gen_ai.request.encoding_formats"
|
|
57
|
+
|
|
58
|
+
# Server attributes (from semantic conventions)
|
|
59
|
+
SERVER_ADDRESS = "server.address"
|
|
60
|
+
SERVER_PORT = "server.port"
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from .types import GenAI
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CompletionCallback(Protocol):
|
|
9
|
+
"""Protocol implemented by handlers interested in completion events."""
|
|
10
|
+
|
|
11
|
+
def on_completion(self, invocation: GenAI) -> None:
|
|
12
|
+
"""Handle completion of a GenAI invocation."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class NoOpCompletionCallback:
|
|
16
|
+
"""Completion callback that performs no work."""
|
|
17
|
+
|
|
18
|
+
def on_completion(
|
|
19
|
+
self, invocation: GenAI
|
|
20
|
+
) -> None: # pragma: no cover - trivial
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
__all__ = ["CompletionCallback", "NoOpCompletionCallback"]
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Dict
|
|
7
|
+
|
|
8
|
+
from .emitters.spec import CategoryOverride
|
|
9
|
+
from .environment_variables import (
|
|
10
|
+
OTEL_GENAI_EVALUATION_EVENT_LEGACY,
|
|
11
|
+
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
|
|
12
|
+
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT_MODE,
|
|
13
|
+
OTEL_INSTRUMENTATION_GENAI_EMITTERS,
|
|
14
|
+
OTEL_INSTRUMENTATION_GENAI_EMITTERS_CONTENT_EVENTS,
|
|
15
|
+
OTEL_INSTRUMENTATION_GENAI_EMITTERS_EVALUATION,
|
|
16
|
+
OTEL_INSTRUMENTATION_GENAI_EMITTERS_METRICS,
|
|
17
|
+
OTEL_INSTRUMENTATION_GENAI_EMITTERS_SPAN,
|
|
18
|
+
OTEL_INSTRUMENTATION_GENAI_EVALUATION_SAMPLE_RATE,
|
|
19
|
+
)
|
|
20
|
+
from .types import ContentCapturingMode
|
|
21
|
+
from .utils import get_content_capturing_mode
|
|
22
|
+
|
|
23
|
+
_logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class Settings:
|
|
28
|
+
"""Configuration for GenAI emitters derived from environment variables."""
|
|
29
|
+
|
|
30
|
+
enable_span: bool
|
|
31
|
+
enable_metrics: bool
|
|
32
|
+
enable_content_events: bool
|
|
33
|
+
extra_emitters: list[str]
|
|
34
|
+
only_traceloop_compat: bool
|
|
35
|
+
raw_tokens: list[str]
|
|
36
|
+
capture_messages_mode: ContentCapturingMode
|
|
37
|
+
capture_messages_override: bool
|
|
38
|
+
legacy_capture_request: bool
|
|
39
|
+
emit_legacy_evaluation_event: bool
|
|
40
|
+
category_overrides: Dict[str, CategoryOverride]
|
|
41
|
+
evaluation_sample_rate: float = 1.0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def parse_env() -> Settings:
|
|
45
|
+
"""Parse emitter-related environment variables into structured settings."""
|
|
46
|
+
|
|
47
|
+
raw_val = os.environ.get(OTEL_INSTRUMENTATION_GENAI_EMITTERS, "span")
|
|
48
|
+
tokens = [
|
|
49
|
+
token.strip().lower() for token in raw_val.split(",") if token.strip()
|
|
50
|
+
]
|
|
51
|
+
if not tokens:
|
|
52
|
+
tokens = ["span"]
|
|
53
|
+
|
|
54
|
+
baseline_map = {
|
|
55
|
+
"span": (True, False, False),
|
|
56
|
+
"span_metric": (True, True, False),
|
|
57
|
+
"span_metric_event": (True, True, True),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
baseline = next((token for token in tokens if token in baseline_map), None)
|
|
61
|
+
extra_emitters: list[str] = []
|
|
62
|
+
only_traceloop_compat = False
|
|
63
|
+
|
|
64
|
+
if baseline is None:
|
|
65
|
+
if tokens == ["traceloop_compat"]:
|
|
66
|
+
baseline = "span"
|
|
67
|
+
extra_emitters = ["traceloop_compat"]
|
|
68
|
+
only_traceloop_compat = True
|
|
69
|
+
else:
|
|
70
|
+
baseline = "span"
|
|
71
|
+
extra_emitters = [
|
|
72
|
+
token for token in tokens if token not in baseline_map
|
|
73
|
+
]
|
|
74
|
+
else:
|
|
75
|
+
extra_emitters = [token for token in tokens if token != baseline]
|
|
76
|
+
|
|
77
|
+
enable_span, enable_metrics, enable_content_events = baseline_map.get(
|
|
78
|
+
baseline, (True, False, False)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
capture_messages_override = any(
|
|
82
|
+
env is not None
|
|
83
|
+
for env in (
|
|
84
|
+
os.environ.get(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT),
|
|
85
|
+
os.environ.get(
|
|
86
|
+
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT_MODE
|
|
87
|
+
),
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
capture_mode = get_content_capturing_mode()
|
|
91
|
+
|
|
92
|
+
# Legacy flag removed: always False now
|
|
93
|
+
legacy_capture_request = False
|
|
94
|
+
|
|
95
|
+
overrides: Dict[str, CategoryOverride] = {}
|
|
96
|
+
override_env_map = {
|
|
97
|
+
"span": os.environ.get(OTEL_INSTRUMENTATION_GENAI_EMITTERS_SPAN, ""),
|
|
98
|
+
"metrics": os.environ.get(
|
|
99
|
+
OTEL_INSTRUMENTATION_GENAI_EMITTERS_METRICS, ""
|
|
100
|
+
),
|
|
101
|
+
"content_events": os.environ.get(
|
|
102
|
+
OTEL_INSTRUMENTATION_GENAI_EMITTERS_CONTENT_EVENTS, ""
|
|
103
|
+
),
|
|
104
|
+
"evaluation": os.environ.get(
|
|
105
|
+
OTEL_INSTRUMENTATION_GENAI_EMITTERS_EVALUATION, ""
|
|
106
|
+
),
|
|
107
|
+
}
|
|
108
|
+
for category, raw in override_env_map.items():
|
|
109
|
+
override = _parse_category_override(category, raw)
|
|
110
|
+
if override is not None:
|
|
111
|
+
overrides[category] = override
|
|
112
|
+
|
|
113
|
+
legacy_event_flag = os.environ.get(
|
|
114
|
+
OTEL_GENAI_EVALUATION_EVENT_LEGACY, ""
|
|
115
|
+
).strip()
|
|
116
|
+
emit_legacy_event = legacy_event_flag.lower() in {"1", "true", "yes"}
|
|
117
|
+
|
|
118
|
+
evaluation_sample_rate = os.environ.get(
|
|
119
|
+
OTEL_INSTRUMENTATION_GENAI_EVALUATION_SAMPLE_RATE
|
|
120
|
+
)
|
|
121
|
+
if evaluation_sample_rate is None or evaluation_sample_rate.strip() == "":
|
|
122
|
+
evaluation_sample_rate = 1.0
|
|
123
|
+
try:
|
|
124
|
+
evaluation_sample_rate = float(evaluation_sample_rate)
|
|
125
|
+
except ValueError:
|
|
126
|
+
evaluation_sample_rate = 1.0
|
|
127
|
+
if evaluation_sample_rate < 0.0:
|
|
128
|
+
evaluation_sample_rate = 0.0
|
|
129
|
+
if evaluation_sample_rate > 1.0:
|
|
130
|
+
evaluation_sample_rate = 1.0
|
|
131
|
+
|
|
132
|
+
return Settings(
|
|
133
|
+
enable_span=enable_span,
|
|
134
|
+
enable_metrics=enable_metrics,
|
|
135
|
+
enable_content_events=enable_content_events,
|
|
136
|
+
extra_emitters=extra_emitters,
|
|
137
|
+
only_traceloop_compat=only_traceloop_compat,
|
|
138
|
+
raw_tokens=tokens,
|
|
139
|
+
capture_messages_mode=capture_mode,
|
|
140
|
+
capture_messages_override=capture_messages_override,
|
|
141
|
+
legacy_capture_request=legacy_capture_request,
|
|
142
|
+
emit_legacy_evaluation_event=emit_legacy_event,
|
|
143
|
+
category_overrides=overrides,
|
|
144
|
+
evaluation_sample_rate=evaluation_sample_rate,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _parse_category_override(
|
|
149
|
+
category: str, raw: str
|
|
150
|
+
) -> CategoryOverride | None: # pragma: no cover - thin parsing
|
|
151
|
+
if not raw:
|
|
152
|
+
return None
|
|
153
|
+
text = raw.strip()
|
|
154
|
+
if not text:
|
|
155
|
+
return None
|
|
156
|
+
directive = None
|
|
157
|
+
remainder = text
|
|
158
|
+
if ":" in text:
|
|
159
|
+
prefix, remainder = text.split(":", 1)
|
|
160
|
+
directive = prefix.strip().lower()
|
|
161
|
+
names = [name.strip() for name in remainder.split(",") if name.strip()]
|
|
162
|
+
mode_map = {
|
|
163
|
+
None: "append",
|
|
164
|
+
"append": "append",
|
|
165
|
+
"prepend": "prepend",
|
|
166
|
+
"replace": "replace-category",
|
|
167
|
+
"replace-category": "replace-category",
|
|
168
|
+
"replace-same-name": "replace-same-name",
|
|
169
|
+
}
|
|
170
|
+
mode = mode_map.get(directive)
|
|
171
|
+
if mode is None:
|
|
172
|
+
if directive:
|
|
173
|
+
_logger.warning(
|
|
174
|
+
"Unknown emitter override directive '%s' for category '%s'",
|
|
175
|
+
directive,
|
|
176
|
+
category,
|
|
177
|
+
)
|
|
178
|
+
mode = "append"
|
|
179
|
+
if mode != "replace-category" and not names:
|
|
180
|
+
return None
|
|
181
|
+
return CategoryOverride(mode=mode, emitter_names=tuple(names))
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
__all__ = ["Settings", "parse_env"]
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Opt-in debug logging utilities for GenAI telemetry types.
|
|
2
|
+
|
|
3
|
+
The debug facility is disabled by default and activated when either
|
|
4
|
+
`OTEL_GENAI_DEBUG` or `OTEL_INSTRUMENTATION_GENAI_DEBUG` environment variable
|
|
5
|
+
is set to a truthy value (case-insensitive one of: 1, true, yes, on, debug).
|
|
6
|
+
|
|
7
|
+
Usage pattern (internal):
|
|
8
|
+
|
|
9
|
+
from opentelemetry.util.genai.debug import genai_debug_log
|
|
10
|
+
genai_debug_log("handler.start_llm.begin", invocation)
|
|
11
|
+
|
|
12
|
+
The helper auto-formats an object representation including span context IDs
|
|
13
|
+
when available.
|
|
14
|
+
|
|
15
|
+
This module intentionally avoids heavy imports and large content dumps.
|
|
16
|
+
Message bodies are NOT logged; counts only.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
import os
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from .span_context import (
|
|
26
|
+
extract_span_context,
|
|
27
|
+
span_context_hex_ids,
|
|
28
|
+
store_span_context,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
try: # Local import guarded for namespace package variations
|
|
32
|
+
from opentelemetry.util.genai.types import GenAI # type: ignore
|
|
33
|
+
except Exception: # pragma: no cover - fallback for edge import errors
|
|
34
|
+
GenAI = object # type: ignore
|
|
35
|
+
|
|
36
|
+
_TRUTHY = {"1", "true", "yes", "on", "debug"}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _read_enabled_flag() -> bool:
|
|
40
|
+
for var in ("OTEL_GENAI_DEBUG", "OTEL_INSTRUMENTATION_GENAI_DEBUG"):
|
|
41
|
+
raw = os.environ.get(var)
|
|
42
|
+
if raw is None:
|
|
43
|
+
continue
|
|
44
|
+
if raw.strip().lower() in _TRUTHY:
|
|
45
|
+
return True
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
_ENABLED = _read_enabled_flag()
|
|
50
|
+
|
|
51
|
+
_LOGGER = logging.getLogger("opentelemetry.util.genai.debug")
|
|
52
|
+
if _ENABLED and not _LOGGER.handlers: # configure minimal handler if none
|
|
53
|
+
handler = logging.StreamHandler()
|
|
54
|
+
fmt = logging.Formatter("%(message)s") # raw message only (no ts/prefix)
|
|
55
|
+
handler.setFormatter(fmt)
|
|
56
|
+
_LOGGER.addHandler(handler)
|
|
57
|
+
_LOGGER.setLevel(logging.DEBUG)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def is_enabled() -> bool:
|
|
61
|
+
"""Return whether GenAI debug logging is enabled."""
|
|
62
|
+
|
|
63
|
+
return _ENABLED
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _hex_trace(span_context: Any) -> str | None:
|
|
67
|
+
try:
|
|
68
|
+
if span_context and getattr(span_context, "is_valid", False):
|
|
69
|
+
return f"{span_context.trace_id:032x}" # type: ignore[attr-defined]
|
|
70
|
+
except Exception: # pragma: no cover
|
|
71
|
+
return None
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _hex_span(span_context: Any) -> str | None:
|
|
76
|
+
try:
|
|
77
|
+
if span_context and getattr(span_context, "is_valid", False):
|
|
78
|
+
return f"{span_context.span_id:016x}" # type: ignore[attr-defined]
|
|
79
|
+
except Exception: # pragma: no cover
|
|
80
|
+
return None
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def summarize_genai(obj: Any) -> str:
|
|
85
|
+
"""Return a short representation for a GenAI object.
|
|
86
|
+
|
|
87
|
+
Avoids printing message content; focuses on identity and context.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
if obj is None:
|
|
91
|
+
return "<None>"
|
|
92
|
+
cls_name = obj.__class__.__name__
|
|
93
|
+
parts: list[str] = [cls_name]
|
|
94
|
+
# Common identifiers
|
|
95
|
+
run_id = getattr(obj, "run_id", None)
|
|
96
|
+
if run_id is not None:
|
|
97
|
+
parts.append(f"run_id={run_id}")
|
|
98
|
+
parent_run_id = getattr(obj, "parent_run_id", None)
|
|
99
|
+
if parent_run_id is not None:
|
|
100
|
+
parts.append(f"parent_run_id={parent_run_id}")
|
|
101
|
+
provider = getattr(obj, "provider", None)
|
|
102
|
+
if provider:
|
|
103
|
+
parts.append(f"provider={provider}")
|
|
104
|
+
model = getattr(obj, "request_model", None) or getattr(obj, "model", None)
|
|
105
|
+
if model:
|
|
106
|
+
parts.append(f"model={model}")
|
|
107
|
+
# Span context
|
|
108
|
+
span = getattr(obj, "span", None)
|
|
109
|
+
span_context = getattr(obj, "span_context", None)
|
|
110
|
+
if span_context is None and span is not None:
|
|
111
|
+
try:
|
|
112
|
+
span_context = extract_span_context(span)
|
|
113
|
+
except Exception: # pragma: no cover
|
|
114
|
+
span_context = None
|
|
115
|
+
else:
|
|
116
|
+
store_span_context(obj, span_context)
|
|
117
|
+
trace_hex, span_hex = span_context_hex_ids(span_context)
|
|
118
|
+
if not trace_hex:
|
|
119
|
+
trace_val = getattr(obj, "trace_id", None)
|
|
120
|
+
if isinstance(trace_val, int) and trace_val:
|
|
121
|
+
trace_hex = f"{trace_val:032x}"
|
|
122
|
+
if not span_hex:
|
|
123
|
+
span_val = getattr(obj, "span_id", None)
|
|
124
|
+
if isinstance(span_val, int) and span_val:
|
|
125
|
+
span_hex = f"{span_val:016x}"
|
|
126
|
+
if trace_hex:
|
|
127
|
+
parts.append(f"trace_id={trace_hex}")
|
|
128
|
+
if span_hex:
|
|
129
|
+
parts.append(f"span_id={span_hex}")
|
|
130
|
+
# Token counts if present
|
|
131
|
+
for attr in ("input_tokens", "output_tokens"):
|
|
132
|
+
val = getattr(obj, attr, None)
|
|
133
|
+
if isinstance(val, (int, float)):
|
|
134
|
+
parts.append(f"{attr}={val}")
|
|
135
|
+
# Message counts when lists
|
|
136
|
+
inp_msgs = getattr(obj, "input_messages", None)
|
|
137
|
+
if isinstance(inp_msgs, list):
|
|
138
|
+
parts.append(f"input_messages={len(inp_msgs)}")
|
|
139
|
+
out_msgs = getattr(obj, "output_messages", None)
|
|
140
|
+
if isinstance(out_msgs, list):
|
|
141
|
+
parts.append(f"output_messages={len(out_msgs)}")
|
|
142
|
+
return "<" + " ".join(parts) + ">"
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def genai_debug_log(event: str, obj: Any = None, **info: Any) -> None:
|
|
146
|
+
"""Conditionally emit a single structured debug log line.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
event : str
|
|
151
|
+
Event key/path (e.g., 'handler.start_llm.begin').
|
|
152
|
+
obj : GenAI | None
|
|
153
|
+
Related GenAI object for context representation.
|
|
154
|
+
**info : Any
|
|
155
|
+
Additional arbitrary key-value pairs (only simple scalar reprs recommended).
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
if not _ENABLED:
|
|
159
|
+
return
|
|
160
|
+
fields: list[str] = ["GENAIDEBUG", f"event={event}"]
|
|
161
|
+
if obj is not None:
|
|
162
|
+
fields.append(f"class={obj.__class__.__name__}")
|
|
163
|
+
# Include summary after key-value list for readability
|
|
164
|
+
for k, v in info.items():
|
|
165
|
+
if v is None:
|
|
166
|
+
continue
|
|
167
|
+
try:
|
|
168
|
+
if isinstance(v, (list, tuple, set)):
|
|
169
|
+
fields.append(f"{k}_count={len(v)}")
|
|
170
|
+
else:
|
|
171
|
+
fields.append(f"{k}={v}")
|
|
172
|
+
except Exception: # pragma: no cover
|
|
173
|
+
continue
|
|
174
|
+
if obj is not None:
|
|
175
|
+
fields.append("repr=" + summarize_genai(obj))
|
|
176
|
+
_LOGGER.debug(" ".join(fields))
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
__all__ = [
|
|
180
|
+
"is_enabled",
|
|
181
|
+
"genai_debug_log",
|
|
182
|
+
"summarize_genai",
|
|
183
|
+
]
|