turingpulse-sdk-autogen 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual environments
7
+ .venv/
8
+ venv/
9
+ ENV/
10
+
11
+ # Distribution / packaging
12
+ dist/
13
+ build/
14
+ *.egg-info/
15
+
16
+ # Database files
17
+ *.db
18
+ *.sqlite3
19
+
20
+ # Environment variables
21
+ .env
22
+ .env.local
23
+
24
+ # IDE
25
+ .idea/
26
+ .vscode/
27
+ *.swp
28
+ *.swo
29
+
30
+ # Testing
31
+ .pytest_cache/
32
+ .coverage
33
+ htmlcov/
34
+ .tox/
35
+
36
+ # Logs
37
+ *.log
38
+ logs/
39
+
40
+ # OS files
41
+ .DS_Store
42
+ Thumbs.db
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: turingpulse-sdk-autogen
3
+ Version: 1.0.0
4
+ Summary: TuringPulse SDK integration for AutoGen
5
+ License-Expression: Apache-2.0
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: pyautogen>=0.7.5
8
+ Requires-Dist: turingpulse-sdk>=1.0.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
11
+ Requires-Dist: pytest>=8.0; extra == 'dev'
@@ -0,0 +1,17 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "turingpulse-sdk-autogen"
7
+ version = "1.0.0"
8
+ description = "TuringPulse SDK integration for AutoGen"
9
+ requires-python = ">=3.11"
10
+ license = "Apache-2.0"
11
+ dependencies = [
12
+ "turingpulse-sdk>=1.0.0",
13
+ "pyautogen>=0.7.5",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ dev = ["pytest>=8.0", "pytest-asyncio>=0.23"]
@@ -0,0 +1,6 @@
1
+ """TuringPulse SDK integration for AutoGen."""
2
+
3
+ from ._wrapper import instrument_autogen
4
+
5
+ __version__ = "0.1.0"
6
+ __all__ = ["instrument_autogen"]
@@ -0,0 +1,334 @@
1
+ """AutoGen 0.2.x (pyautogen) instrumentation for TuringPulse SDK.
2
+
3
+ Wraps ``user_proxy.initiate_chat()`` to capture:
4
+
5
+ - Aggregate token / cost from ``chat_result.cost`` (keyed by model).
6
+ - Per-turn child spans by walking ``chat_result.chat_history``.
7
+ - Tool-call spans when messages contain ``tool_calls`` or ``role == "tool"``.
8
+ - LLM reasoning spans for assistant messages.
9
+
10
+ **Known shortfalls (AutoGen 0.2.x)**
11
+
12
+ 1. ``chat_result.cost`` is a model-level aggregate — per-turn token
13
+ counts are **not available**. We distribute totals evenly across
14
+ LLM spans.
15
+ 2. Tool execution results in chat history don't include timing info.
16
+ 3. ``chat_result.summary`` is often a plain string, not a structured
17
+ dict — token extraction from summary is unreliable.
18
+
19
+ Usage::
20
+
21
+ from turingpulse_sdk.integrations.autogen import instrument_autogen
22
+
23
+ run_workflow = instrument_autogen(
24
+ user_proxy, assistant,
25
+ name="my-autogen-workflow",
26
+ )
27
+ result = run_workflow(message="Handle this request", max_turns=10)
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import json
33
+ import logging
34
+ import time
35
+ from typing import Any, Callable, Dict, List, Optional, Sequence
36
+
37
+ from turingpulse_sdk.config import MAX_FIELD_SIZE
38
+ from turingpulse_sdk.context import current_context
39
+ from turingpulse_sdk import instrument, GovernanceDirective
40
+ from turingpulse_sdk.integrations.base import emit_child_spans
41
+
42
+ logger = logging.getLogger("turingpulse.sdk.integrations.autogen")
43
+
44
+ FRAMEWORK_NAME = "autogen"
45
+ FRAMEWORK_VERSION = "0.2.35"
46
+
47
+
48
+ def instrument_autogen(
49
+ user_proxy: Any,
50
+ assistant: Any,
51
+ *,
52
+ name: str,
53
+ governance: Optional[GovernanceDirective] = None,
54
+ model: str = "gpt-4o-mini",
55
+ provider: str = "openai",
56
+ tool_names: Optional[List[str]] = None,
57
+ kpis: Optional[Sequence["KPIConfig"]] = None,
58
+ metadata: Optional[Dict[str, str]] = None,
59
+ ) -> Callable[..., Any]:
60
+ """Instrument an AutoGen chat for TuringPulse observability.
61
+
62
+ Returns a callable that runs ``user_proxy.initiate_chat(assistant, …)``
63
+ and emits a root workflow span plus per-turn child spans.
64
+
65
+ Args:
66
+ user_proxy: The ``ConversableAgent`` acting as user proxy.
67
+ assistant: The ``ConversableAgent`` acting as the LLM assistant.
68
+ name: Workflow display name for TuringPulse.
69
+ governance: Optional governance directive for policy enforcement.
70
+ model: LLM model name (default ``gpt-4o-mini``).
71
+ provider: LLM provider (default ``openai``).
72
+ tool_names: Names of registered tools for metadata.
73
+
74
+ Returns:
75
+ A callable ``run(message=..., **chat_kwargs) -> ChatResult``.
76
+ """
77
+ system_msg = getattr(assistant, "system_message", "") or ""
78
+
79
+ @instrument(name=name, governance=governance, kpis=kpis, metadata=metadata or {})
80
+ def run(message: str, **chat_kwargs: Any) -> Any:
81
+ t0 = time.time()
82
+ chat_result = user_proxy.initiate_chat(
83
+ assistant,
84
+ message=message,
85
+ **chat_kwargs,
86
+ )
87
+ total_duration_ms = int((time.time() - t0) * 1000)
88
+
89
+ ctx = current_context()
90
+ if ctx is None:
91
+ logger.warning("instrument_autogen: no active context")
92
+ return chat_result
93
+
94
+ # ── Extract aggregate cost / tokens ──
95
+ total_prompt, total_completion, total_cost = _extract_cost(chat_result)
96
+
97
+ ctx.set_tokens(total_prompt, total_completion)
98
+ ctx.set_cost(total_cost)
99
+ ctx.set_model(model, provider)
100
+ ctx.set_prompt(message[:MAX_FIELD_SIZE], system_msg[:MAX_FIELD_SIZE])
101
+ ctx.framework = FRAMEWORK_NAME
102
+ ctx.node_type = "workflow"
103
+
104
+ # Final output — last non-empty assistant message
105
+ chat_history = getattr(chat_result, "chat_history", []) or []
106
+ final_output = _extract_final_output(chat_history)
107
+ ctx.set_io(input_data=message[:MAX_FIELD_SIZE], output_data=final_output[:MAX_FIELD_SIZE])
108
+
109
+ if tool_names:
110
+ ctx.available_tools = tool_names
111
+
112
+ # ── Build child spans from chat history ──
113
+ child_spans = _build_chat_spans(
114
+ chat_history, total_duration_ms,
115
+ total_prompt, total_completion, total_cost,
116
+ model, provider, message, system_msg,
117
+ tool_names or [],
118
+ )
119
+ emit_child_spans(
120
+ child_spans,
121
+ run_id=ctx.run_id,
122
+ parent_span_id=ctx.span_id,
123
+ workflow_name=name,
124
+ framework=FRAMEWORK_NAME,
125
+ )
126
+
127
+ return chat_result
128
+
129
+ return run
130
+
131
+
132
+ # ── Helpers ──────────────────────────────────────────────────────────────
133
+
134
+
135
+ def _extract_cost(chat_result: Any) -> tuple[int, int, float]:
136
+ """Extract total tokens and cost from ``chat_result.cost``.
137
+
138
+ AutoGen 0.2.x ``ChatResult.cost`` has a nested structure::
139
+
140
+ {
141
+ "usage_including_cached_inference": {
142
+ "total_cost": 0.000548,
143
+ "gpt-4o-mini-2024-07-18": {
144
+ "cost": 0.000548,
145
+ "prompt_tokens": 2425,
146
+ "completion_tokens": 308,
147
+ "total_tokens": 2733,
148
+ }
149
+ },
150
+ "usage_excluding_cached_inference": { ... }
151
+ }
152
+
153
+ We prefer ``usage_including_cached_inference`` for the complete picture.
154
+ """
155
+ cost_info = getattr(chat_result, "cost", {}) or {}
156
+ total_prompt = 0
157
+ total_completion = 0
158
+ total_cost = 0.0
159
+
160
+ if not isinstance(cost_info, dict):
161
+ return total_prompt, total_completion, total_cost
162
+
163
+ # Prefer usage_including_cached_inference, fallback to any key
164
+ usage_data = cost_info.get("usage_including_cached_inference")
165
+ if not isinstance(usage_data, dict):
166
+ usage_data = cost_info.get("usage_excluding_cached_inference")
167
+ if not isinstance(usage_data, dict):
168
+ # Try treating cost_info itself as the usage map (flat format)
169
+ usage_data = cost_info
170
+
171
+ if isinstance(usage_data, dict):
172
+ for key, value in usage_data.items():
173
+ if isinstance(value, dict) and "prompt_tokens" in value:
174
+ total_prompt += value.get("prompt_tokens", 0)
175
+ total_completion += value.get("completion_tokens", 0)
176
+ total_cost += value.get("cost", 0.0)
177
+
178
+ return total_prompt, total_completion, total_cost
179
+
180
+
181
+ def _extract_final_output(chat_history: List[Dict[str, Any]]) -> str:
182
+ """Get the last non-TERMINATE assistant message."""
183
+ for msg in reversed(chat_history):
184
+ content = msg.get("content", "") or ""
185
+ if content and "TERMINATE" not in content:
186
+ return content
187
+ for msg in reversed(chat_history):
188
+ content = msg.get("content", "") or ""
189
+ if content:
190
+ return content.replace("TERMINATE", "").strip()
191
+ return ""
192
+
193
+
194
+ def _build_chat_spans(
195
+ chat_history: List[Dict[str, Any]],
196
+ total_duration_ms: int,
197
+ total_prompt: int,
198
+ total_completion: int,
199
+ total_cost: float,
200
+ model: str,
201
+ provider: str,
202
+ user_message: str,
203
+ system_msg: str,
204
+ tool_names: List[str],
205
+ ) -> List[Dict[str, Any]]:
206
+ """Walk AutoGen chat history and build child span dicts."""
207
+ spans: List[Dict[str, Any]] = []
208
+
209
+ num_messages = max(len(chat_history), 1)
210
+ llm_messages = [
211
+ m for m in chat_history
212
+ if m.get("tool_calls") or (
213
+ m.get("content") and m.get("role") not in ("user", "tool")
214
+ )
215
+ ]
216
+ num_llm = max(len(llm_messages), 1)
217
+ per_llm_prompt = total_prompt // num_llm
218
+ per_llm_completion = total_completion // num_llm
219
+ per_llm_cost = 0 # Backend calculates cost based on model pricing
220
+
221
+ llm_count = 0
222
+ tool_count = 0
223
+
224
+ # Two-pass approach: First pass collects tool calls, second pass links
225
+ # tool results back to the tool call records for proper success/error
226
+ # tracking (AUDIT-FIX for AutoGen two-pass linking).
227
+
228
+ # Pass 1: Build a mapping of tool_call_id -> tool result content from
229
+ # all tool-result messages that follow the tool call requests.
230
+ tool_result_map: dict = {}
231
+ for msg in chat_history:
232
+ if msg.get("role") == "tool":
233
+ tc_id = msg.get("tool_call_id", "")
234
+ if tc_id:
235
+ tool_result_map[tc_id] = msg.get("content", "") or ""
236
+
237
+ # Pass 2: Process messages and create spans with linked tool results
238
+ for i, msg in enumerate(chat_history):
239
+ role = msg.get("role", msg.get("name", "unknown"))
240
+ content = msg.get("content", "") or ""
241
+ tool_calls = msg.get("tool_calls", [])
242
+
243
+ if tool_calls:
244
+ llm_count += 1
245
+ tc_data = []
246
+ for tc in tool_calls:
247
+ fn = tc.get("function", {})
248
+ args_raw = fn.get("arguments", "{}")
249
+ try:
250
+ args = json.loads(args_raw) if isinstance(args_raw, str) else (args_raw or {})
251
+ except (json.JSONDecodeError, TypeError):
252
+ args = {"raw": str(args_raw)[:500]}
253
+ tc_id = tc.get("id", "")
254
+
255
+ # Link tool result from the result map
256
+ linked_result = tool_result_map.get(tc_id)
257
+ has_result = linked_result is not None
258
+ tc_success = has_result
259
+ tc_error = None if has_result else "No tool result found in chat history"
260
+
261
+ tc_data.append({
262
+ "tool_name": fn.get("name", "unknown"),
263
+ "tool_args": args,
264
+ "tool_result": str(linked_result)[:MAX_FIELD_SIZE] if linked_result else "",
265
+ "tool_id": tc_id,
266
+ "success": tc_success,
267
+ "error_message": tc_error,
268
+ })
269
+
270
+ spans.append({
271
+ "node": f"llm_reasoning_{llm_count}",
272
+ "node_type": "llm",
273
+ "duration_ms": total_duration_ms // num_messages,
274
+ "status": "success",
275
+ "prompt": user_message[:MAX_FIELD_SIZE],
276
+ "system_prompt": system_msg[:MAX_FIELD_SIZE],
277
+ "input": {"message_index": i, "role": role},
278
+ "output": {"content": content[:MAX_FIELD_SIZE], "tool_calls_count": len(tool_calls)},
279
+ "model": model,
280
+ "provider": provider,
281
+ "tokens": {"prompt": per_llm_prompt, "completion": per_llm_completion},
282
+ "cost_usd": round(per_llm_cost, 6),
283
+ "tool_calls": tc_data,
284
+ "available_tools": tool_names,
285
+ "metadata": {
286
+ "autogen_agent": str(role),
287
+ "autogen_message_type": "assistant_with_tools",
288
+ "autogen_conversation_turn": str(i),
289
+ "autogen_version": FRAMEWORK_VERSION,
290
+ },
291
+ })
292
+
293
+ elif msg.get("role") == "tool":
294
+ tool_count += 1
295
+ tool_name = msg.get("name", f"tool_{tool_count}")
296
+ spans.append({
297
+ "node": f"tool_{tool_name}",
298
+ "node_type": "tool",
299
+ "duration_ms": total_duration_ms // num_messages,
300
+ "status": "success",
301
+ "input": {"tool_call_id": msg.get("tool_call_id", "")},
302
+ "output": {"result": content[:MAX_FIELD_SIZE]},
303
+ "available_tools": tool_names,
304
+ "metadata": {
305
+ "autogen_tool_name": tool_name,
306
+ "autogen_message_type": "tool_result",
307
+ "autogen_version": FRAMEWORK_VERSION,
308
+ },
309
+ })
310
+
311
+ elif content and role != "user":
312
+ llm_count += 1
313
+ spans.append({
314
+ "node": f"assistant_response_{llm_count}",
315
+ "node_type": "llm",
316
+ "duration_ms": total_duration_ms // num_messages,
317
+ "status": "success",
318
+ "prompt": user_message[:MAX_FIELD_SIZE],
319
+ "system_prompt": system_msg[:MAX_FIELD_SIZE],
320
+ "input": {"conversation_context": f"Turn {i} in AutoGen chat"},
321
+ "output": {"response": content[:MAX_FIELD_SIZE]},
322
+ "model": model,
323
+ "provider": provider,
324
+ "tokens": {"prompt": per_llm_prompt, "completion": per_llm_completion},
325
+ "cost_usd": round(per_llm_cost, 6),
326
+ "metadata": {
327
+ "autogen_agent": str(role),
328
+ "autogen_message_type": "assistant_response",
329
+ "autogen_conversation_turn": str(i),
330
+ "autogen_version": FRAMEWORK_VERSION,
331
+ },
332
+ })
333
+
334
+ return spans