turingpulse-sdk-autogen 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Virtual environments
|
|
7
|
+
.venv/
|
|
8
|
+
venv/
|
|
9
|
+
ENV/
|
|
10
|
+
|
|
11
|
+
# Distribution / packaging
|
|
12
|
+
dist/
|
|
13
|
+
build/
|
|
14
|
+
*.egg-info/
|
|
15
|
+
|
|
16
|
+
# Database files
|
|
17
|
+
*.db
|
|
18
|
+
*.sqlite3
|
|
19
|
+
|
|
20
|
+
# Environment variables
|
|
21
|
+
.env
|
|
22
|
+
.env.local
|
|
23
|
+
|
|
24
|
+
# IDE
|
|
25
|
+
.idea/
|
|
26
|
+
.vscode/
|
|
27
|
+
*.swp
|
|
28
|
+
*.swo
|
|
29
|
+
|
|
30
|
+
# Testing
|
|
31
|
+
.pytest_cache/
|
|
32
|
+
.coverage
|
|
33
|
+
htmlcov/
|
|
34
|
+
.tox/
|
|
35
|
+
|
|
36
|
+
# Logs
|
|
37
|
+
*.log
|
|
38
|
+
logs/
|
|
39
|
+
|
|
40
|
+
# OS files
|
|
41
|
+
.DS_Store
|
|
42
|
+
Thumbs.db
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: turingpulse-sdk-autogen
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: TuringPulse SDK integration for AutoGen
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: pyautogen>=0.7.5
|
|
8
|
+
Requires-Dist: turingpulse-sdk>=1.0.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
11
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "turingpulse-sdk-autogen"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "TuringPulse SDK integration for AutoGen"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"turingpulse-sdk>=1.0.0",
|
|
13
|
+
"pyautogen>=0.7.5",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.23"]
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
"""AutoGen 0.2.x (pyautogen) instrumentation for TuringPulse SDK.
|
|
2
|
+
|
|
3
|
+
Wraps ``user_proxy.initiate_chat()`` to capture:
|
|
4
|
+
|
|
5
|
+
- Aggregate token / cost from ``chat_result.cost`` (keyed by model).
|
|
6
|
+
- Per-turn child spans by walking ``chat_result.chat_history``.
|
|
7
|
+
- Tool-call spans when messages contain ``tool_calls`` or ``role == "tool"``.
|
|
8
|
+
- LLM reasoning spans for assistant messages.
|
|
9
|
+
|
|
10
|
+
**Known shortfalls (AutoGen 0.2.x)**
|
|
11
|
+
|
|
12
|
+
1. ``chat_result.cost`` is a model-level aggregate — per-turn token
|
|
13
|
+
counts are **not available**. We distribute totals evenly across
|
|
14
|
+
LLM spans.
|
|
15
|
+
2. Tool execution results in chat history don't include timing info.
|
|
16
|
+
3. ``chat_result.summary`` is often a plain string, not a structured
|
|
17
|
+
dict — token extraction from summary is unreliable.
|
|
18
|
+
|
|
19
|
+
Usage::
|
|
20
|
+
|
|
21
|
+
from turingpulse_sdk.integrations.autogen import instrument_autogen
|
|
22
|
+
|
|
23
|
+
run_workflow = instrument_autogen(
|
|
24
|
+
user_proxy, assistant,
|
|
25
|
+
name="my-autogen-workflow",
|
|
26
|
+
)
|
|
27
|
+
result = run_workflow(message="Handle this request", max_turns=10)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import json
|
|
33
|
+
import logging
|
|
34
|
+
import time
|
|
35
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence
|
|
36
|
+
|
|
37
|
+
from turingpulse_sdk.config import MAX_FIELD_SIZE
|
|
38
|
+
from turingpulse_sdk.context import current_context
|
|
39
|
+
from turingpulse_sdk import instrument, GovernanceDirective
|
|
40
|
+
from turingpulse_sdk.integrations.base import emit_child_spans
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger("turingpulse.sdk.integrations.autogen")
|
|
43
|
+
|
|
44
|
+
FRAMEWORK_NAME = "autogen"
|
|
45
|
+
FRAMEWORK_VERSION = "0.2.35"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def instrument_autogen(
|
|
49
|
+
user_proxy: Any,
|
|
50
|
+
assistant: Any,
|
|
51
|
+
*,
|
|
52
|
+
name: str,
|
|
53
|
+
governance: Optional[GovernanceDirective] = None,
|
|
54
|
+
model: str = "gpt-4o-mini",
|
|
55
|
+
provider: str = "openai",
|
|
56
|
+
tool_names: Optional[List[str]] = None,
|
|
57
|
+
kpis: Optional[Sequence["KPIConfig"]] = None,
|
|
58
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
59
|
+
) -> Callable[..., Any]:
|
|
60
|
+
"""Instrument an AutoGen chat for TuringPulse observability.
|
|
61
|
+
|
|
62
|
+
Returns a callable that runs ``user_proxy.initiate_chat(assistant, …)``
|
|
63
|
+
and emits a root workflow span plus per-turn child spans.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
user_proxy: The ``ConversableAgent`` acting as user proxy.
|
|
67
|
+
assistant: The ``ConversableAgent`` acting as the LLM assistant.
|
|
68
|
+
name: Workflow display name for TuringPulse.
|
|
69
|
+
governance: Optional governance directive for policy enforcement.
|
|
70
|
+
model: LLM model name (default ``gpt-4o-mini``).
|
|
71
|
+
provider: LLM provider (default ``openai``).
|
|
72
|
+
tool_names: Names of registered tools for metadata.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A callable ``run(message=..., **chat_kwargs) -> ChatResult``.
|
|
76
|
+
"""
|
|
77
|
+
system_msg = getattr(assistant, "system_message", "") or ""
|
|
78
|
+
|
|
79
|
+
@instrument(name=name, governance=governance, kpis=kpis, metadata=metadata or {})
|
|
80
|
+
def run(message: str, **chat_kwargs: Any) -> Any:
|
|
81
|
+
t0 = time.time()
|
|
82
|
+
chat_result = user_proxy.initiate_chat(
|
|
83
|
+
assistant,
|
|
84
|
+
message=message,
|
|
85
|
+
**chat_kwargs,
|
|
86
|
+
)
|
|
87
|
+
total_duration_ms = int((time.time() - t0) * 1000)
|
|
88
|
+
|
|
89
|
+
ctx = current_context()
|
|
90
|
+
if ctx is None:
|
|
91
|
+
logger.warning("instrument_autogen: no active context")
|
|
92
|
+
return chat_result
|
|
93
|
+
|
|
94
|
+
# ── Extract aggregate cost / tokens ──
|
|
95
|
+
total_prompt, total_completion, total_cost = _extract_cost(chat_result)
|
|
96
|
+
|
|
97
|
+
ctx.set_tokens(total_prompt, total_completion)
|
|
98
|
+
ctx.set_cost(total_cost)
|
|
99
|
+
ctx.set_model(model, provider)
|
|
100
|
+
ctx.set_prompt(message[:MAX_FIELD_SIZE], system_msg[:MAX_FIELD_SIZE])
|
|
101
|
+
ctx.framework = FRAMEWORK_NAME
|
|
102
|
+
ctx.node_type = "workflow"
|
|
103
|
+
|
|
104
|
+
# Final output — last non-empty assistant message
|
|
105
|
+
chat_history = getattr(chat_result, "chat_history", []) or []
|
|
106
|
+
final_output = _extract_final_output(chat_history)
|
|
107
|
+
ctx.set_io(input_data=message[:MAX_FIELD_SIZE], output_data=final_output[:MAX_FIELD_SIZE])
|
|
108
|
+
|
|
109
|
+
if tool_names:
|
|
110
|
+
ctx.available_tools = tool_names
|
|
111
|
+
|
|
112
|
+
# ── Build child spans from chat history ──
|
|
113
|
+
child_spans = _build_chat_spans(
|
|
114
|
+
chat_history, total_duration_ms,
|
|
115
|
+
total_prompt, total_completion, total_cost,
|
|
116
|
+
model, provider, message, system_msg,
|
|
117
|
+
tool_names or [],
|
|
118
|
+
)
|
|
119
|
+
emit_child_spans(
|
|
120
|
+
child_spans,
|
|
121
|
+
run_id=ctx.run_id,
|
|
122
|
+
parent_span_id=ctx.span_id,
|
|
123
|
+
workflow_name=name,
|
|
124
|
+
framework=FRAMEWORK_NAME,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return chat_result
|
|
128
|
+
|
|
129
|
+
return run
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ── Helpers ──────────────────────────────────────────────────────────────
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _extract_cost(chat_result: Any) -> tuple[int, int, float]:
|
|
136
|
+
"""Extract total tokens and cost from ``chat_result.cost``.
|
|
137
|
+
|
|
138
|
+
AutoGen 0.2.x ``ChatResult.cost`` has a nested structure::
|
|
139
|
+
|
|
140
|
+
{
|
|
141
|
+
"usage_including_cached_inference": {
|
|
142
|
+
"total_cost": 0.000548,
|
|
143
|
+
"gpt-4o-mini-2024-07-18": {
|
|
144
|
+
"cost": 0.000548,
|
|
145
|
+
"prompt_tokens": 2425,
|
|
146
|
+
"completion_tokens": 308,
|
|
147
|
+
"total_tokens": 2733,
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
"usage_excluding_cached_inference": { ... }
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
We prefer ``usage_including_cached_inference`` for the complete picture.
|
|
154
|
+
"""
|
|
155
|
+
cost_info = getattr(chat_result, "cost", {}) or {}
|
|
156
|
+
total_prompt = 0
|
|
157
|
+
total_completion = 0
|
|
158
|
+
total_cost = 0.0
|
|
159
|
+
|
|
160
|
+
if not isinstance(cost_info, dict):
|
|
161
|
+
return total_prompt, total_completion, total_cost
|
|
162
|
+
|
|
163
|
+
# Prefer usage_including_cached_inference, fallback to any key
|
|
164
|
+
usage_data = cost_info.get("usage_including_cached_inference")
|
|
165
|
+
if not isinstance(usage_data, dict):
|
|
166
|
+
usage_data = cost_info.get("usage_excluding_cached_inference")
|
|
167
|
+
if not isinstance(usage_data, dict):
|
|
168
|
+
# Try treating cost_info itself as the usage map (flat format)
|
|
169
|
+
usage_data = cost_info
|
|
170
|
+
|
|
171
|
+
if isinstance(usage_data, dict):
|
|
172
|
+
for key, value in usage_data.items():
|
|
173
|
+
if isinstance(value, dict) and "prompt_tokens" in value:
|
|
174
|
+
total_prompt += value.get("prompt_tokens", 0)
|
|
175
|
+
total_completion += value.get("completion_tokens", 0)
|
|
176
|
+
total_cost += value.get("cost", 0.0)
|
|
177
|
+
|
|
178
|
+
return total_prompt, total_completion, total_cost
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _extract_final_output(chat_history: List[Dict[str, Any]]) -> str:
|
|
182
|
+
"""Get the last non-TERMINATE assistant message."""
|
|
183
|
+
for msg in reversed(chat_history):
|
|
184
|
+
content = msg.get("content", "") or ""
|
|
185
|
+
if content and "TERMINATE" not in content:
|
|
186
|
+
return content
|
|
187
|
+
for msg in reversed(chat_history):
|
|
188
|
+
content = msg.get("content", "") or ""
|
|
189
|
+
if content:
|
|
190
|
+
return content.replace("TERMINATE", "").strip()
|
|
191
|
+
return ""
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _build_chat_spans(
|
|
195
|
+
chat_history: List[Dict[str, Any]],
|
|
196
|
+
total_duration_ms: int,
|
|
197
|
+
total_prompt: int,
|
|
198
|
+
total_completion: int,
|
|
199
|
+
total_cost: float,
|
|
200
|
+
model: str,
|
|
201
|
+
provider: str,
|
|
202
|
+
user_message: str,
|
|
203
|
+
system_msg: str,
|
|
204
|
+
tool_names: List[str],
|
|
205
|
+
) -> List[Dict[str, Any]]:
|
|
206
|
+
"""Walk AutoGen chat history and build child span dicts."""
|
|
207
|
+
spans: List[Dict[str, Any]] = []
|
|
208
|
+
|
|
209
|
+
num_messages = max(len(chat_history), 1)
|
|
210
|
+
llm_messages = [
|
|
211
|
+
m for m in chat_history
|
|
212
|
+
if m.get("tool_calls") or (
|
|
213
|
+
m.get("content") and m.get("role") not in ("user", "tool")
|
|
214
|
+
)
|
|
215
|
+
]
|
|
216
|
+
num_llm = max(len(llm_messages), 1)
|
|
217
|
+
per_llm_prompt = total_prompt // num_llm
|
|
218
|
+
per_llm_completion = total_completion // num_llm
|
|
219
|
+
per_llm_cost = 0 # Backend calculates cost based on model pricing
|
|
220
|
+
|
|
221
|
+
llm_count = 0
|
|
222
|
+
tool_count = 0
|
|
223
|
+
|
|
224
|
+
# Two-pass approach: First pass collects tool calls, second pass links
|
|
225
|
+
# tool results back to the tool call records for proper success/error
|
|
226
|
+
# tracking (AUDIT-FIX for AutoGen two-pass linking).
|
|
227
|
+
|
|
228
|
+
# Pass 1: Build a mapping of tool_call_id -> tool result content from
|
|
229
|
+
# all tool-result messages that follow the tool call requests.
|
|
230
|
+
tool_result_map: dict = {}
|
|
231
|
+
for msg in chat_history:
|
|
232
|
+
if msg.get("role") == "tool":
|
|
233
|
+
tc_id = msg.get("tool_call_id", "")
|
|
234
|
+
if tc_id:
|
|
235
|
+
tool_result_map[tc_id] = msg.get("content", "") or ""
|
|
236
|
+
|
|
237
|
+
# Pass 2: Process messages and create spans with linked tool results
|
|
238
|
+
for i, msg in enumerate(chat_history):
|
|
239
|
+
role = msg.get("role", msg.get("name", "unknown"))
|
|
240
|
+
content = msg.get("content", "") or ""
|
|
241
|
+
tool_calls = msg.get("tool_calls", [])
|
|
242
|
+
|
|
243
|
+
if tool_calls:
|
|
244
|
+
llm_count += 1
|
|
245
|
+
tc_data = []
|
|
246
|
+
for tc in tool_calls:
|
|
247
|
+
fn = tc.get("function", {})
|
|
248
|
+
args_raw = fn.get("arguments", "{}")
|
|
249
|
+
try:
|
|
250
|
+
args = json.loads(args_raw) if isinstance(args_raw, str) else (args_raw or {})
|
|
251
|
+
except (json.JSONDecodeError, TypeError):
|
|
252
|
+
args = {"raw": str(args_raw)[:500]}
|
|
253
|
+
tc_id = tc.get("id", "")
|
|
254
|
+
|
|
255
|
+
# Link tool result from the result map
|
|
256
|
+
linked_result = tool_result_map.get(tc_id)
|
|
257
|
+
has_result = linked_result is not None
|
|
258
|
+
tc_success = has_result
|
|
259
|
+
tc_error = None if has_result else "No tool result found in chat history"
|
|
260
|
+
|
|
261
|
+
tc_data.append({
|
|
262
|
+
"tool_name": fn.get("name", "unknown"),
|
|
263
|
+
"tool_args": args,
|
|
264
|
+
"tool_result": str(linked_result)[:MAX_FIELD_SIZE] if linked_result else "",
|
|
265
|
+
"tool_id": tc_id,
|
|
266
|
+
"success": tc_success,
|
|
267
|
+
"error_message": tc_error,
|
|
268
|
+
})
|
|
269
|
+
|
|
270
|
+
spans.append({
|
|
271
|
+
"node": f"llm_reasoning_{llm_count}",
|
|
272
|
+
"node_type": "llm",
|
|
273
|
+
"duration_ms": total_duration_ms // num_messages,
|
|
274
|
+
"status": "success",
|
|
275
|
+
"prompt": user_message[:MAX_FIELD_SIZE],
|
|
276
|
+
"system_prompt": system_msg[:MAX_FIELD_SIZE],
|
|
277
|
+
"input": {"message_index": i, "role": role},
|
|
278
|
+
"output": {"content": content[:MAX_FIELD_SIZE], "tool_calls_count": len(tool_calls)},
|
|
279
|
+
"model": model,
|
|
280
|
+
"provider": provider,
|
|
281
|
+
"tokens": {"prompt": per_llm_prompt, "completion": per_llm_completion},
|
|
282
|
+
"cost_usd": round(per_llm_cost, 6),
|
|
283
|
+
"tool_calls": tc_data,
|
|
284
|
+
"available_tools": tool_names,
|
|
285
|
+
"metadata": {
|
|
286
|
+
"autogen_agent": str(role),
|
|
287
|
+
"autogen_message_type": "assistant_with_tools",
|
|
288
|
+
"autogen_conversation_turn": str(i),
|
|
289
|
+
"autogen_version": FRAMEWORK_VERSION,
|
|
290
|
+
},
|
|
291
|
+
})
|
|
292
|
+
|
|
293
|
+
elif msg.get("role") == "tool":
|
|
294
|
+
tool_count += 1
|
|
295
|
+
tool_name = msg.get("name", f"tool_{tool_count}")
|
|
296
|
+
spans.append({
|
|
297
|
+
"node": f"tool_{tool_name}",
|
|
298
|
+
"node_type": "tool",
|
|
299
|
+
"duration_ms": total_duration_ms // num_messages,
|
|
300
|
+
"status": "success",
|
|
301
|
+
"input": {"tool_call_id": msg.get("tool_call_id", "")},
|
|
302
|
+
"output": {"result": content[:MAX_FIELD_SIZE]},
|
|
303
|
+
"available_tools": tool_names,
|
|
304
|
+
"metadata": {
|
|
305
|
+
"autogen_tool_name": tool_name,
|
|
306
|
+
"autogen_message_type": "tool_result",
|
|
307
|
+
"autogen_version": FRAMEWORK_VERSION,
|
|
308
|
+
},
|
|
309
|
+
})
|
|
310
|
+
|
|
311
|
+
elif content and role != "user":
|
|
312
|
+
llm_count += 1
|
|
313
|
+
spans.append({
|
|
314
|
+
"node": f"assistant_response_{llm_count}",
|
|
315
|
+
"node_type": "llm",
|
|
316
|
+
"duration_ms": total_duration_ms // num_messages,
|
|
317
|
+
"status": "success",
|
|
318
|
+
"prompt": user_message[:MAX_FIELD_SIZE],
|
|
319
|
+
"system_prompt": system_msg[:MAX_FIELD_SIZE],
|
|
320
|
+
"input": {"conversation_context": f"Turn {i} in AutoGen chat"},
|
|
321
|
+
"output": {"response": content[:MAX_FIELD_SIZE]},
|
|
322
|
+
"model": model,
|
|
323
|
+
"provider": provider,
|
|
324
|
+
"tokens": {"prompt": per_llm_prompt, "completion": per_llm_completion},
|
|
325
|
+
"cost_usd": round(per_llm_cost, 6),
|
|
326
|
+
"metadata": {
|
|
327
|
+
"autogen_agent": str(role),
|
|
328
|
+
"autogen_message_type": "assistant_response",
|
|
329
|
+
"autogen_conversation_turn": str(i),
|
|
330
|
+
"autogen_version": FRAMEWORK_VERSION,
|
|
331
|
+
},
|
|
332
|
+
})
|
|
333
|
+
|
|
334
|
+
return spans
|