splunk-otel-util-genai-translator-traceloop 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splunk_otel_util_genai_translator_traceloop-0.1.3/.gitignore +8 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/PKG-INFO +96 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/README.rst +68 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/pyproject.toml +60 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/src/opentelemetry/util/genai/processor/__init__.py +20 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/src/opentelemetry/util/genai/processor/content_normalizer.py +324 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/src/opentelemetry/util/genai/processor/filtering_span_processor.py +161 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/src/opentelemetry/util/genai/processor/message_reconstructor.py +219 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/src/opentelemetry/util/genai/processor/traceloop_span_processor.py +1287 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/src/opentelemetry/util/genai/traceloop/__init__.py +237 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/src/opentelemetry/util/genai/version.py +2 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/src/opentelemetry_util_genai_traceloop_translator.pth +5 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/tests/test_agent_task_message_reconstruction.py +442 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/tests/test_args_wrapper_format.py +188 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/tests/test_message_caching.py +453 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/tests/test_message_serialization.py +148 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/tests/test_nested_traceloop_reconstruction.py +246 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/tests/test_real_traceloop_format.py +360 -0
- splunk_otel_util_genai_translator_traceloop-0.1.3/tests/test_traceloop_integration.py +773 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: splunk-otel-util-genai-translator-traceloop
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Traceloop -> GenAI translator emitter for OpenTelemetry GenAI
|
|
5
|
+
Project-URL: Homepage, https://github.com/open-telemetry/opentelemetry-python-contrib
|
|
6
|
+
Project-URL: Repository, https://github.com/open-telemetry/opentelemetry-python-contrib
|
|
7
|
+
Author-email: OpenTelemetry Authors <cncf-opentelemetry-contributors@lists.cncf.io>
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: opentelemetry-api>=1.31.0
|
|
21
|
+
Requires-Dist: opentelemetry-instrumentation~=0.52b1
|
|
22
|
+
Requires-Dist: opentelemetry-sdk>=1.31.0
|
|
23
|
+
Requires-Dist: opentelemetry-semantic-conventions~=0.52b1
|
|
24
|
+
Requires-Dist: splunk-otel-util-genai>=0.1.3
|
|
25
|
+
Provides-Extra: test
|
|
26
|
+
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
27
|
+
Description-Content-Type: text/x-rst
|
|
28
|
+
|
|
29
|
+
OpenTelemetry GenAI Traceloop Translator
|
|
30
|
+
=========================================
|
|
31
|
+
|
|
32
|
+
This package automatically translates Traceloop-instrumented spans into OpenTelemetry GenAI semantic conventions.
|
|
33
|
+
It intercepts spans with ``traceloop.*`` attributes and creates corresponding spans with ``gen_ai.*`` attributes,
|
|
34
|
+
enabling seamless integration between Traceloop instrumentation and GenAI observability tools.
|
|
35
|
+
|
|
36
|
+
Mapping Table
|
|
37
|
+
-------------
|
|
38
|
+
|
|
39
|
+
============================== ================================
|
|
40
|
+
Traceloop Key Added Key
|
|
41
|
+
============================== ================================
|
|
42
|
+
``traceloop.workflow.name`` ``gen_ai.workflow.name``
|
|
43
|
+
``traceloop.entity.name`` ``gen_ai.agent.name``
|
|
44
|
+
``traceloop.entity.path`` ``gen_ai.workflow.path``
|
|
45
|
+
``traceloop.correlation.id`` ``gen_ai.conversation.id``
|
|
46
|
+
``traceloop.entity.input`` ``gen_ai.input.messages``
|
|
47
|
+
``traceloop.entity.output`` ``gen_ai.output.messages``
|
|
48
|
+
============================== ================================
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
Installation
|
|
52
|
+
------------
|
|
53
|
+
.. code-block:: bash
|
|
54
|
+
|
|
55
|
+
pip install opentelemetry-util-genai-traceloop-translator
|
|
56
|
+
|
|
57
|
+
Quick Start (Automatic Registration)
|
|
58
|
+
-------------------------------------
|
|
59
|
+
The easiest way to use the translator is to simply import it - no manual setup required!
|
|
60
|
+
|
|
61
|
+
.. code-block:: python
|
|
62
|
+
|
|
63
|
+
import os
|
|
64
|
+
from openai import OpenAI
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
from traceloop.sdk import Traceloop
|
|
68
|
+
from traceloop.sdk.decorators import workflow
|
|
69
|
+
|
|
70
|
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
71
|
+
|
|
72
|
+
Traceloop.init(app_name="story_service")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@workflow(name="streaming_story")
|
|
76
|
+
def joke_workflow():
|
|
77
|
+
stream = client.chat.completions.create(
|
|
78
|
+
model="gpt-4o-2024-05-13",
|
|
79
|
+
messages=[{"role": "user", "content": "Tell me a story about opentelemetry"}],
|
|
80
|
+
stream=True,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
for part in stream:
|
|
84
|
+
print(part.choices[0].delta.content or "", end="")
|
|
85
|
+
print()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
joke_workflow()
|
|
89
|
+
# The translator automatically creates new gen_ai.* attributes based on the mapping.
|
|
90
|
+
|
|
91
|
+
Tests
|
|
92
|
+
-----
|
|
93
|
+
.. code-block:: bash
|
|
94
|
+
|
|
95
|
+
pytest util/opentelemetry-util-genai-traceloop-translator/tests
|
|
96
|
+
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
OpenTelemetry GenAI Traceloop Translator
|
|
2
|
+
=========================================
|
|
3
|
+
|
|
4
|
+
This package automatically translates Traceloop-instrumented spans into OpenTelemetry GenAI semantic conventions.
|
|
5
|
+
It intercepts spans with ``traceloop.*`` attributes and creates corresponding spans with ``gen_ai.*`` attributes,
|
|
6
|
+
enabling seamless integration between Traceloop instrumentation and GenAI observability tools.
|
|
7
|
+
|
|
8
|
+
Mapping Table
|
|
9
|
+
-------------
|
|
10
|
+
|
|
11
|
+
============================== ================================
|
|
12
|
+
Traceloop Key Added Key
|
|
13
|
+
============================== ================================
|
|
14
|
+
``traceloop.workflow.name`` ``gen_ai.workflow.name``
|
|
15
|
+
``traceloop.entity.name`` ``gen_ai.agent.name``
|
|
16
|
+
``traceloop.entity.path`` ``gen_ai.workflow.path``
|
|
17
|
+
``traceloop.correlation.id`` ``gen_ai.conversation.id``
|
|
18
|
+
``traceloop.entity.input`` ``gen_ai.input.messages``
|
|
19
|
+
``traceloop.entity.output`` ``gen_ai.output.messages``
|
|
20
|
+
============================== ================================
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
Installation
|
|
24
|
+
------------
|
|
25
|
+
.. code-block:: bash
|
|
26
|
+
|
|
27
|
+
pip install opentelemetry-util-genai-traceloop-translator
|
|
28
|
+
|
|
29
|
+
Quick Start (Automatic Registration)
|
|
30
|
+
-------------------------------------
|
|
31
|
+
The easiest way to use the translator is to simply import it - no manual setup required!
|
|
32
|
+
|
|
33
|
+
.. code-block:: python
|
|
34
|
+
|
|
35
|
+
import os
|
|
36
|
+
from openai import OpenAI
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
from traceloop.sdk import Traceloop
|
|
40
|
+
from traceloop.sdk.decorators import workflow
|
|
41
|
+
|
|
42
|
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
43
|
+
|
|
44
|
+
Traceloop.init(app_name="story_service")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@workflow(name="streaming_story")
|
|
48
|
+
def joke_workflow():
|
|
49
|
+
stream = client.chat.completions.create(
|
|
50
|
+
model="gpt-4o-2024-05-13",
|
|
51
|
+
messages=[{"role": "user", "content": "Tell me a story about opentelemetry"}],
|
|
52
|
+
stream=True,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
for part in stream:
|
|
56
|
+
print(part.choices[0].delta.content or "", end="")
|
|
57
|
+
print()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
joke_workflow()
|
|
61
|
+
# The translator automatically creates new gen_ai.* attributes based on the mapping.
|
|
62
|
+
|
|
63
|
+
Tests
|
|
64
|
+
-----
|
|
65
|
+
.. code-block:: bash
|
|
66
|
+
|
|
67
|
+
pytest util/opentelemetry-util-genai-traceloop-translator/tests
|
|
68
|
+
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "splunk-otel-util-genai-translator-traceloop"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Traceloop -> GenAI translator emitter for OpenTelemetry GenAI"
|
|
9
|
+
readme = "README.rst"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" },
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: Apache Software License",
|
|
19
|
+
"Programming Language :: Python",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"opentelemetry-instrumentation ~= 0.52b1",
|
|
29
|
+
"opentelemetry-semantic-conventions ~= 0.52b1",
|
|
30
|
+
"opentelemetry-api>=1.31.0",
|
|
31
|
+
"opentelemetry-sdk>=1.31.0",
|
|
32
|
+
"splunk-otel-util-genai>=0.1.3",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.entry-points.opentelemetry_configurator]
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
test = ["pytest>=7.0.0"]
|
|
39
|
+
|
|
40
|
+
[project.urls]
|
|
41
|
+
Homepage = "https://github.com/open-telemetry/opentelemetry-python-contrib"
|
|
42
|
+
Repository = "https://github.com/open-telemetry/opentelemetry-python-contrib"
|
|
43
|
+
|
|
44
|
+
[tool.hatch.version]
|
|
45
|
+
path = "src/opentelemetry/util/genai/version.py"
|
|
46
|
+
|
|
47
|
+
[tool.hatch.build.targets.sdist]
|
|
48
|
+
include = [
|
|
49
|
+
"/src",
|
|
50
|
+
"/tests",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[tool.hatch.build.targets.wheel]
|
|
54
|
+
packages = ["src/opentelemetry"]
|
|
55
|
+
include = [
|
|
56
|
+
"src/opentelemetry_util_genai_traceloop_translator.pth",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
[tool.hatch.build.targets.wheel.force-include]
|
|
60
|
+
"src/opentelemetry_util_genai_traceloop_translator.pth" = "opentelemetry_util_genai_traceloop_translator.pth"
|
splunk_otel_util_genai_translator_traceloop-0.1.3/src/opentelemetry/util/genai/processor/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright The OpenTelemetry Authors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Traceloop span processor and transformation utilities."""
|
|
16
|
+
|
|
17
|
+
from .filtering_span_processor import FilteringSpanProcessor
|
|
18
|
+
from .traceloop_span_processor import TraceloopSpanProcessor
|
|
19
|
+
|
|
20
|
+
__all__ = ["TraceloopSpanProcessor", "FilteringSpanProcessor"]
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
# Internal sizing caps (kept private to module, not exposed via env)
|
|
7
|
+
INPUT_MAX = 100
|
|
8
|
+
OUTPUT_MAX = 100
|
|
9
|
+
MSG_CONTENT_MAX = 16000
|
|
10
|
+
PROMPT_TEMPLATE_MAX = 4096
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def maybe_truncate_template(value: Any) -> Any:
|
|
14
|
+
if not isinstance(value, str) or len(value) <= PROMPT_TEMPLATE_MAX:
|
|
15
|
+
return value
|
|
16
|
+
return value[:PROMPT_TEMPLATE_MAX] + "…(truncated)"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _coerce_text_part(content: Any) -> Dict[str, Any]:
|
|
20
|
+
if not isinstance(content, str):
|
|
21
|
+
try:
|
|
22
|
+
content = json.dumps(content)[:MSG_CONTENT_MAX]
|
|
23
|
+
except Exception:
|
|
24
|
+
content = str(content)[:MSG_CONTENT_MAX]
|
|
25
|
+
else:
|
|
26
|
+
content = content[:MSG_CONTENT_MAX]
|
|
27
|
+
return {"type": "text", "content": content}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _extract_langchain_messages(content_val: Any) -> List[Dict[str, Any]]:
|
|
31
|
+
"""
|
|
32
|
+
Extract actual message content from nested LangChain message objects.
|
|
33
|
+
|
|
34
|
+
Handles formats like:
|
|
35
|
+
- {"messages": [{"lc": 1, "kwargs": {"content": "text", "type": "human"}}]}
|
|
36
|
+
- {"outputs": {"messages": [{"lc": 1, "kwargs": {"content": "text"}}]}}
|
|
37
|
+
|
|
38
|
+
Returns list of extracted messages with their content and role.
|
|
39
|
+
"""
|
|
40
|
+
extracted = []
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
# Parse if it's a JSON string
|
|
44
|
+
if isinstance(content_val, str):
|
|
45
|
+
try:
|
|
46
|
+
content_val = json.loads(content_val)
|
|
47
|
+
except Exception:
|
|
48
|
+
return [] # Not JSON, let caller handle it
|
|
49
|
+
|
|
50
|
+
if not isinstance(content_val, dict):
|
|
51
|
+
return []
|
|
52
|
+
|
|
53
|
+
# Check for "outputs" wrapper (common in workflow outputs)
|
|
54
|
+
if "outputs" in content_val and isinstance(content_val["outputs"], dict):
|
|
55
|
+
content_val = content_val["outputs"]
|
|
56
|
+
|
|
57
|
+
# Look for "messages" array
|
|
58
|
+
messages = content_val.get("messages", [])
|
|
59
|
+
if not isinstance(messages, list):
|
|
60
|
+
return []
|
|
61
|
+
|
|
62
|
+
# Extract content from each LangChain message
|
|
63
|
+
for msg in messages:
|
|
64
|
+
if not isinstance(msg, dict):
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
# Check if this is a LangChain message (has "lc": 1 and "kwargs")
|
|
68
|
+
if msg.get("lc") == 1 and "kwargs" in msg:
|
|
69
|
+
kwargs = msg["kwargs"]
|
|
70
|
+
if isinstance(kwargs, dict):
|
|
71
|
+
msg_content = kwargs.get("content")
|
|
72
|
+
msg_type = kwargs.get("type", "unknown")
|
|
73
|
+
|
|
74
|
+
if msg_content:
|
|
75
|
+
# Map LangChain types to roles
|
|
76
|
+
if msg_type == "human":
|
|
77
|
+
role = "user"
|
|
78
|
+
elif msg_type == "ai":
|
|
79
|
+
role = "assistant"
|
|
80
|
+
elif msg_type == "system":
|
|
81
|
+
role = "system"
|
|
82
|
+
else:
|
|
83
|
+
# Infer from message position
|
|
84
|
+
role = "user" if not extracted else "assistant"
|
|
85
|
+
|
|
86
|
+
extracted.append({
|
|
87
|
+
"content": msg_content,
|
|
88
|
+
"role": role
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
return extracted
|
|
92
|
+
|
|
93
|
+
except Exception:
|
|
94
|
+
return []
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def normalize_traceloop_content(
|
|
98
|
+
raw: Any, direction: str
|
|
99
|
+
) -> List[Dict[str, Any]]:
|
|
100
|
+
"""Normalize traceloop entity input/output blob into GenAI message schema.
|
|
101
|
+
|
|
102
|
+
direction: 'input' | 'output'
|
|
103
|
+
Returns list of messages: {role, parts, finish_reason?}
|
|
104
|
+
"""
|
|
105
|
+
# List[dict] messages already
|
|
106
|
+
if isinstance(raw, list) and all(isinstance(m, dict) for m in raw):
|
|
107
|
+
normalized: List[Dict[str, Any]] = []
|
|
108
|
+
limit = INPUT_MAX if direction == "input" else OUTPUT_MAX
|
|
109
|
+
for m in raw[:limit]:
|
|
110
|
+
role = m.get(
|
|
111
|
+
"role", "user" if direction == "input" else "assistant"
|
|
112
|
+
)
|
|
113
|
+
content_val = m.get("content")
|
|
114
|
+
if content_val is None:
|
|
115
|
+
temp = {
|
|
116
|
+
k: v
|
|
117
|
+
for k, v in m.items()
|
|
118
|
+
if k not in ("role", "finish_reason", "finishReason")
|
|
119
|
+
}
|
|
120
|
+
content_val = temp or ""
|
|
121
|
+
|
|
122
|
+
# CRITICAL FIX: Check if content contains nested LangChain messages
|
|
123
|
+
# This handles the format where Traceloop serializes workflow inputs/outputs
|
|
124
|
+
# with LangChain message objects embedded in JSON
|
|
125
|
+
langchain_messages = _extract_langchain_messages(content_val)
|
|
126
|
+
|
|
127
|
+
if langchain_messages:
|
|
128
|
+
# We found nested LangChain messages - extract their content
|
|
129
|
+
for lc_msg in langchain_messages:
|
|
130
|
+
parts = [_coerce_text_part(lc_msg["content"])]
|
|
131
|
+
msg: Dict[str, Any] = {"role": lc_msg["role"], "parts": parts}
|
|
132
|
+
if direction == "output":
|
|
133
|
+
fr = m.get("finish_reason") or m.get("finishReason") or "stop"
|
|
134
|
+
msg["finish_reason"] = fr
|
|
135
|
+
normalized.append(msg)
|
|
136
|
+
else:
|
|
137
|
+
# No nested LangChain messages - use content as-is
|
|
138
|
+
parts = [_coerce_text_part(content_val)]
|
|
139
|
+
msg: Dict[str, Any] = {"role": role, "parts": parts}
|
|
140
|
+
if direction == "output":
|
|
141
|
+
fr = m.get("finish_reason") or m.get("finishReason") or "stop"
|
|
142
|
+
msg["finish_reason"] = fr
|
|
143
|
+
normalized.append(msg)
|
|
144
|
+
|
|
145
|
+
return normalized
|
|
146
|
+
|
|
147
|
+
# Dict variants
|
|
148
|
+
if isinstance(raw, dict):
|
|
149
|
+
# OpenAI choices
|
|
150
|
+
if (
|
|
151
|
+
direction == "output"
|
|
152
|
+
and "choices" in raw
|
|
153
|
+
and isinstance(raw["choices"], list)
|
|
154
|
+
):
|
|
155
|
+
out_msgs: List[Dict[str, Any]] = []
|
|
156
|
+
for choice in raw["choices"][:OUTPUT_MAX]:
|
|
157
|
+
message = (
|
|
158
|
+
choice.get("message") if isinstance(choice, dict) else None
|
|
159
|
+
)
|
|
160
|
+
if message and isinstance(message, dict):
|
|
161
|
+
role = message.get("role", "assistant")
|
|
162
|
+
content_val = (
|
|
163
|
+
message.get("content") or message.get("text") or ""
|
|
164
|
+
)
|
|
165
|
+
else:
|
|
166
|
+
role = "assistant"
|
|
167
|
+
content_val = (
|
|
168
|
+
choice.get("text")
|
|
169
|
+
or choice.get("content")
|
|
170
|
+
or json.dumps(choice)
|
|
171
|
+
)
|
|
172
|
+
parts = [_coerce_text_part(content_val)]
|
|
173
|
+
finish_reason = (
|
|
174
|
+
choice.get("finish_reason")
|
|
175
|
+
or choice.get("finishReason")
|
|
176
|
+
or "stop"
|
|
177
|
+
)
|
|
178
|
+
out_msgs.append(
|
|
179
|
+
{
|
|
180
|
+
"role": role,
|
|
181
|
+
"parts": parts,
|
|
182
|
+
"finish_reason": finish_reason,
|
|
183
|
+
}
|
|
184
|
+
)
|
|
185
|
+
return out_msgs
|
|
186
|
+
# Gemini candidates
|
|
187
|
+
if (
|
|
188
|
+
direction == "output"
|
|
189
|
+
and "candidates" in raw
|
|
190
|
+
and isinstance(raw["candidates"], list)
|
|
191
|
+
):
|
|
192
|
+
out_msgs: List[Dict[str, Any]] = []
|
|
193
|
+
for cand in raw["candidates"][:OUTPUT_MAX]:
|
|
194
|
+
role = cand.get("role", "assistant")
|
|
195
|
+
cand_content = cand.get("content")
|
|
196
|
+
if isinstance(cand_content, list):
|
|
197
|
+
joined = "\n".join(
|
|
198
|
+
[
|
|
199
|
+
str(p.get("text", p.get("content", p)))
|
|
200
|
+
for p in cand_content
|
|
201
|
+
]
|
|
202
|
+
)
|
|
203
|
+
content_val = joined
|
|
204
|
+
else:
|
|
205
|
+
content_val = cand_content or json.dumps(cand)
|
|
206
|
+
parts = [_coerce_text_part(content_val)]
|
|
207
|
+
finish_reason = (
|
|
208
|
+
cand.get("finish_reason")
|
|
209
|
+
or cand.get("finishReason")
|
|
210
|
+
or "stop"
|
|
211
|
+
)
|
|
212
|
+
out_msgs.append(
|
|
213
|
+
{
|
|
214
|
+
"role": role,
|
|
215
|
+
"parts": parts,
|
|
216
|
+
"finish_reason": finish_reason,
|
|
217
|
+
}
|
|
218
|
+
)
|
|
219
|
+
return out_msgs
|
|
220
|
+
# messages array
|
|
221
|
+
if "messages" in raw and isinstance(raw["messages"], list):
|
|
222
|
+
return normalize_traceloop_content(raw["messages"], direction)
|
|
223
|
+
# wrapper args (LangGraph/Traceloop format with function call args)
|
|
224
|
+
if "args" in raw and isinstance(raw["args"], list) and len(raw["args"]) > 0:
|
|
225
|
+
# Extract first arg (usually contains messages and other params)
|
|
226
|
+
first_arg = raw["args"][0]
|
|
227
|
+
if isinstance(first_arg, dict):
|
|
228
|
+
# Recursively process - will find "messages" array
|
|
229
|
+
return normalize_traceloop_content(first_arg, direction)
|
|
230
|
+
# wrapper inputs
|
|
231
|
+
if "inputs" in raw:
|
|
232
|
+
inner = raw["inputs"]
|
|
233
|
+
if isinstance(inner, list):
|
|
234
|
+
return normalize_traceloop_content(inner, direction)
|
|
235
|
+
if isinstance(inner, dict):
|
|
236
|
+
# Recursively process - might contain "messages" array
|
|
237
|
+
return normalize_traceloop_content(inner, direction)
|
|
238
|
+
# tool calls
|
|
239
|
+
if (
|
|
240
|
+
direction == "output"
|
|
241
|
+
and "tool_calls" in raw
|
|
242
|
+
and isinstance(raw["tool_calls"], list)
|
|
243
|
+
):
|
|
244
|
+
out_msgs: List[Dict[str, Any]] = []
|
|
245
|
+
for tc in raw["tool_calls"][:OUTPUT_MAX]:
|
|
246
|
+
part = {
|
|
247
|
+
"type": "tool_call",
|
|
248
|
+
"name": tc.get("name", "tool"),
|
|
249
|
+
"arguments": tc.get("arguments"),
|
|
250
|
+
"id": tc.get("id"),
|
|
251
|
+
}
|
|
252
|
+
finish_reason = (
|
|
253
|
+
tc.get("finish_reason")
|
|
254
|
+
or tc.get("finishReason")
|
|
255
|
+
or "tool_call"
|
|
256
|
+
)
|
|
257
|
+
out_msgs.append(
|
|
258
|
+
{
|
|
259
|
+
"role": "assistant",
|
|
260
|
+
"parts": [part],
|
|
261
|
+
"finish_reason": finish_reason,
|
|
262
|
+
}
|
|
263
|
+
)
|
|
264
|
+
return out_msgs
|
|
265
|
+
body = {k: v for k, v in raw.items() if k != "role"}
|
|
266
|
+
if direction == "output":
|
|
267
|
+
return [
|
|
268
|
+
{
|
|
269
|
+
"role": "assistant",
|
|
270
|
+
"parts": [_coerce_text_part(body)],
|
|
271
|
+
"finish_reason": "stop",
|
|
272
|
+
}
|
|
273
|
+
]
|
|
274
|
+
return [{"role": "user", "parts": [_coerce_text_part(body)]}]
|
|
275
|
+
|
|
276
|
+
# JSON string
|
|
277
|
+
if isinstance(raw, str):
|
|
278
|
+
try:
|
|
279
|
+
parsed = json.loads(raw)
|
|
280
|
+
return normalize_traceloop_content(parsed, direction)
|
|
281
|
+
except Exception:
|
|
282
|
+
if direction == "output":
|
|
283
|
+
return [
|
|
284
|
+
{
|
|
285
|
+
"role": "assistant",
|
|
286
|
+
"parts": [_coerce_text_part(raw)],
|
|
287
|
+
"finish_reason": "stop",
|
|
288
|
+
}
|
|
289
|
+
]
|
|
290
|
+
return [{"role": "user", "parts": [_coerce_text_part(raw)]}]
|
|
291
|
+
|
|
292
|
+
# List of raw strings
|
|
293
|
+
if isinstance(raw, list) and all(isinstance(s, str) for s in raw):
|
|
294
|
+
msgs: List[Dict[str, Any]] = []
|
|
295
|
+
limit = INPUT_MAX if direction == "input" else OUTPUT_MAX
|
|
296
|
+
for s in raw[:limit]:
|
|
297
|
+
msgs.append(
|
|
298
|
+
{
|
|
299
|
+
"role": "user" if direction == "input" else "assistant",
|
|
300
|
+
"parts": [_coerce_text_part(s)],
|
|
301
|
+
}
|
|
302
|
+
)
|
|
303
|
+
return msgs
|
|
304
|
+
|
|
305
|
+
# Generic fallback
|
|
306
|
+
if direction == "output":
|
|
307
|
+
return [
|
|
308
|
+
{
|
|
309
|
+
"role": "assistant",
|
|
310
|
+
"parts": [_coerce_text_part(raw)],
|
|
311
|
+
"finish_reason": "stop",
|
|
312
|
+
}
|
|
313
|
+
]
|
|
314
|
+
return [{"role": "user", "parts": [_coerce_text_part(raw)]}]
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
__all__ = [
|
|
318
|
+
"normalize_traceloop_content",
|
|
319
|
+
"maybe_truncate_template",
|
|
320
|
+
"INPUT_MAX",
|
|
321
|
+
"OUTPUT_MAX",
|
|
322
|
+
"MSG_CONTENT_MAX",
|
|
323
|
+
"PROMPT_TEMPLATE_MAX",
|
|
324
|
+
]
|