synth-ai 0.2.3__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/compound/cais.py +0 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +115 -1
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth.py +3 -3
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth_v2_backup.py +3 -3
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +4 -4
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/test_crafter_react_agent_openai_v2_backup.py +3 -3
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +1 -1
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/crafter_custom/environment.py +1 -1
- synth_ai/environments/service/core_routes.py +1 -1
- synth_ai/learning/prompts/mipro.py +8 -0
- synth_ai/lm/core/main_v3.py +219 -158
- synth_ai/tracing_v3/__init__.py +2 -2
- synth_ai/tracing_v3/abstractions.py +62 -17
- synth_ai/tracing_v3/hooks.py +1 -1
- synth_ai/tracing_v3/llm_call_record_helpers.py +350 -0
- synth_ai/tracing_v3/lm_call_record_abstractions.py +257 -0
- synth_ai/tracing_v3/session_tracer.py +5 -5
- synth_ai/tracing_v3/tests/test_concurrent_operations.py +1 -1
- synth_ai/tracing_v3/tests/test_llm_call_records.py +672 -0
- synth_ai/tracing_v3/tests/test_session_tracer.py +43 -9
- synth_ai/tracing_v3/tests/test_turso_manager.py +1 -1
- synth_ai/tracing_v3/turso/manager.py +10 -3
- synth_ai/tracing_v3/turso/models.py +1 -0
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/METADATA +3 -2
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/RECORD +30 -26
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/top_level.txt +0 -0
@@ -18,13 +18,27 @@ Session Structure:
|
|
18
18
|
- SessionTrace: Top-level container for a complete session
|
19
19
|
- SessionTimeStep: Logical steps within a session (e.g., conversation turns)
|
20
20
|
- Events: Individual events that occurred during the timestep
|
21
|
-
- Messages:
|
21
|
+
- Messages: Information passed between subsystems (user, agent, runtime, environments)
|
22
|
+
|
23
|
+
Concepts:
|
24
|
+
---------
|
25
|
+
- Events capture something that happened inside a subsystem. They may or may not be externally
|
26
|
+
visible. Examples include an LLM API call (LMCAISEvent), a tool selection (RuntimeEvent), or
|
27
|
+
a tool execution outcome (EnvironmentEvent).
|
28
|
+
|
29
|
+
- Messages represent information transmitted between subsystems within the session.
|
30
|
+
Messages are used to record communications like: a user sending input to the agent,
|
31
|
+
the agent/runtime sending a tool invocation to an environment, the environment sending a
|
32
|
+
tool result back, and the agent sending a reply to the user. Do not confuse these with
|
33
|
+
provider-specific LLM API "messages" (prompt formatting) — those belong inside an LMCAISEvent
|
34
|
+
as part of its input/output content, not as SessionEventMessages.
|
22
35
|
"""
|
23
36
|
|
24
37
|
from __future__ import annotations
|
25
38
|
from dataclasses import dataclass, field, asdict
|
26
39
|
from datetime import datetime
|
27
40
|
from typing import Any, Dict, List, Optional
|
41
|
+
from .lm_call_record_abstractions import LLMCallRecord
|
28
42
|
|
29
43
|
|
30
44
|
@dataclass
|
@@ -46,18 +60,39 @@ class TimeRecord:
|
|
46
60
|
|
47
61
|
|
48
62
|
@dataclass
|
49
|
-
class
|
50
|
-
"""Message
|
63
|
+
class SessionEventMarkovBlanketMessage:
|
64
|
+
"""Message crossing Markov blanket boundaries between systems in a session.
|
65
|
+
|
66
|
+
IMPORTANT: This represents information transfer BETWEEN distinct systems/subsystems,
|
67
|
+
where each system is conceptualized as having a Markov blanket that separates its
|
68
|
+
internal states from the external environment. These messages cross those boundaries.
|
69
|
+
|
70
|
+
This is NOT for chat messages within an LLM conversation (those belong in LLMCallRecord).
|
71
|
+
Instead, this captures inter-system communication such as:
|
72
|
+
- Human -> Agent system (user providing instructions)
|
73
|
+
- Agent -> Runtime (agent deciding on an action)
|
74
|
+
- Runtime -> Environment (executing a tool/action)
|
75
|
+
- Environment -> Runtime (returning results)
|
76
|
+
- Runtime -> Agent (passing back results)
|
77
|
+
- Agent -> Human (final response)
|
51
78
|
|
52
|
-
|
53
|
-
|
79
|
+
Each system maintains its own internal state and processing, but can only influence
|
80
|
+
other systems through these explicit boundary-crossing messages. This follows the
|
81
|
+
Free Energy Principle where systems minimize surprise by maintaining boundaries.
|
54
82
|
|
55
83
|
Attributes:
|
56
|
-
content: The actual message content (text, JSON, etc.)
|
57
|
-
message_type: Type
|
58
|
-
time_record: Timing information for the
|
59
|
-
metadata:
|
60
|
-
|
84
|
+
content: The actual message content crossing the boundary (text, JSON, etc.)
|
85
|
+
message_type: Type of boundary crossing (e.g., 'observation', 'action', 'result')
|
86
|
+
time_record: Timing information for the boundary crossing
|
87
|
+
metadata: Boundary crossing metadata. Recommended keys:
|
88
|
+
- 'step_id': Timestep identifier
|
89
|
+
- 'from_system_instance_id': UUID of the sending system
|
90
|
+
- 'to_system_instance_id': UUID of the receiving system
|
91
|
+
- 'from_system_role': Role of sender (e.g., 'human', 'agent', 'runtime', 'environment')
|
92
|
+
- 'to_system_role': Role of receiver
|
93
|
+
- 'boundary_type': Type of Markov blanket boundary being crossed
|
94
|
+
- 'call_id': Correlate request/response pairs across boundaries
|
95
|
+
- 'causal_influence': Direction of causal flow
|
61
96
|
"""
|
62
97
|
|
63
98
|
content: str
|
@@ -70,8 +105,9 @@ class SessionEventMessage:
|
|
70
105
|
class BaseEvent:
|
71
106
|
"""Base class for all event types.
|
72
107
|
|
73
|
-
This is the foundation for all events in the tracing system. Every event
|
74
|
-
|
108
|
+
This is the foundation for all events in the tracing system. Every event must
|
109
|
+
have a system identifier and timing information. Events are intra-system facts
|
110
|
+
(they occur within a subsystem) and are not necessarily direct communications.
|
75
111
|
|
76
112
|
Attributes:
|
77
113
|
system_instance_id: Identifier for the system/component that generated
|
@@ -95,8 +131,10 @@ class BaseEvent:
|
|
95
131
|
class RuntimeEvent(BaseEvent):
|
96
132
|
"""Event from runtime system.
|
97
133
|
|
98
|
-
Captures events from the AI system's runtime, typically representing
|
99
|
-
|
134
|
+
Captures events from the AI system's runtime, typically representing decisions
|
135
|
+
or actions taken by the system (e.g., selecting a tool with arguments).
|
136
|
+
Use paired SessionEventMessages to record the communication of this choice to
|
137
|
+
the environment.
|
100
138
|
|
101
139
|
Attributes:
|
102
140
|
actions: List of action identifiers or indices. The interpretation
|
@@ -111,7 +149,9 @@ class RuntimeEvent(BaseEvent):
|
|
111
149
|
class EnvironmentEvent(BaseEvent):
|
112
150
|
"""Event from environment.
|
113
151
|
|
114
|
-
Captures feedback from the environment in response to system actions.
|
152
|
+
Captures feedback from the environment in response to system actions (e.g.,
|
153
|
+
command output, exit codes, observations). Use a paired SessionEventMessage
|
154
|
+
to record the environment-to-agent communication of the result.
|
115
155
|
Follows the Gymnasium/OpenAI Gym convention for compatibility.
|
116
156
|
|
117
157
|
Attributes:
|
@@ -135,6 +175,8 @@ class LMCAISEvent(BaseEvent):
|
|
135
175
|
|
136
176
|
CAIS (Claude AI System) events capture detailed information about LLM calls,
|
137
177
|
including performance metrics, cost tracking, and distributed tracing support.
|
178
|
+
Treat provider-specific prompt/completion structures as part of this event's
|
179
|
+
data. Do not emit them as SessionEventMessages.
|
138
180
|
|
139
181
|
Attributes:
|
140
182
|
model_name: The specific model used (e.g., 'gpt-4', 'claude-3-opus')
|
@@ -148,6 +190,8 @@ class LMCAISEvent(BaseEvent):
|
|
148
190
|
trace_id: OpenTelemetry compatible trace identifier
|
149
191
|
system_state_before: State snapshot before the LLM call
|
150
192
|
system_state_after: State snapshot after the LLM call
|
193
|
+
call_records: List of normalized LLM call records capturing request/response
|
194
|
+
details (messages, tool calls/results, usage, params, etc.).
|
151
195
|
"""
|
152
196
|
|
153
197
|
model_name: str = ""
|
@@ -161,6 +205,7 @@ class LMCAISEvent(BaseEvent):
|
|
161
205
|
trace_id: Optional[str] = None
|
162
206
|
system_state_before: Optional[Dict[str, Any]] = None
|
163
207
|
system_state_after: Optional[Dict[str, Any]] = None
|
208
|
+
call_records: List[LLMCallRecord] = field(default_factory=list)
|
164
209
|
|
165
210
|
|
166
211
|
@dataclass
|
@@ -188,7 +233,7 @@ class SessionTimeStep:
|
|
188
233
|
timestamp: datetime = field(default_factory=datetime.utcnow)
|
189
234
|
turn_number: Optional[int] = None
|
190
235
|
events: List[BaseEvent] = field(default_factory=list)
|
191
|
-
|
236
|
+
markov_blanket_messages: List[SessionEventMarkovBlanketMessage] = field(default_factory=list)
|
192
237
|
step_metadata: Dict[str, Any] = field(default_factory=dict)
|
193
238
|
completed_at: Optional[datetime] = None
|
194
239
|
|
@@ -222,7 +267,7 @@ class SessionTrace:
|
|
222
267
|
created_at: datetime = field(default_factory=datetime.utcnow)
|
223
268
|
session_time_steps: List[SessionTimeStep] = field(default_factory=list)
|
224
269
|
event_history: List[BaseEvent] = field(default_factory=list)
|
225
|
-
|
270
|
+
markov_blanket_message_history: List[SessionEventMarkovBlanketMessage] = field(default_factory=list)
|
226
271
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
227
272
|
session_metadata: Optional[List[Dict[str, Any]]] = None
|
228
273
|
|
synth_ai/tracing_v3/hooks.py
CHANGED
@@ -37,7 +37,7 @@ from dataclasses import dataclass
|
|
37
37
|
import asyncio
|
38
38
|
import inspect
|
39
39
|
|
40
|
-
from .abstractions import SessionTrace, SessionTimeStep, BaseEvent,
|
40
|
+
from .abstractions import SessionTrace, SessionTimeStep, BaseEvent, SessionEventMarkovBlanketMessage
|
41
41
|
|
42
42
|
|
43
43
|
@dataclass
|
@@ -0,0 +1,350 @@
|
|
1
|
+
"""Helper functions for creating and populating LLMCallRecord instances.
|
2
|
+
|
3
|
+
This module provides utilities to convert vendor responses to LLMCallRecord
|
4
|
+
format and compute aggregates from call records.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import uuid
|
8
|
+
import json
|
9
|
+
from datetime import datetime
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
11
|
+
|
12
|
+
from synth_ai.tracing_v3.lm_call_record_abstractions import (
|
13
|
+
LLMCallRecord,
|
14
|
+
LLMUsage,
|
15
|
+
LLMRequestParams,
|
16
|
+
LLMMessage,
|
17
|
+
LLMContentPart,
|
18
|
+
ToolCallSpec,
|
19
|
+
ToolCallResult,
|
20
|
+
LLMChunk,
|
21
|
+
)
|
22
|
+
from synth_ai.lm.vendors.base import BaseLMResponse
|
23
|
+
|
24
|
+
|
25
|
+
def create_llm_call_record_from_response(
|
26
|
+
response: BaseLMResponse,
|
27
|
+
model_name: str,
|
28
|
+
provider: str,
|
29
|
+
messages: List[Dict[str, Any]],
|
30
|
+
temperature: float = 0.8,
|
31
|
+
request_params: Optional[Dict[str, Any]] = None,
|
32
|
+
tools: Optional[List] = None,
|
33
|
+
started_at: Optional[datetime] = None,
|
34
|
+
completed_at: Optional[datetime] = None,
|
35
|
+
latency_ms: Optional[int] = None,
|
36
|
+
) -> LLMCallRecord:
|
37
|
+
"""Create an LLMCallRecord from a vendor response.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
response: The vendor response object
|
41
|
+
model_name: Name of the model used
|
42
|
+
provider: Provider name (e.g., 'openai', 'anthropic')
|
43
|
+
messages: Input messages sent to the model
|
44
|
+
temperature: Temperature parameter used
|
45
|
+
request_params: Additional request parameters
|
46
|
+
tools: Tools provided to the model
|
47
|
+
started_at: When the request started
|
48
|
+
completed_at: When the request completed
|
49
|
+
latency_ms: End-to-end latency in milliseconds
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
A populated LLMCallRecord instance
|
53
|
+
"""
|
54
|
+
# Generate call ID
|
55
|
+
call_id = str(uuid.uuid4())
|
56
|
+
|
57
|
+
# Determine API type from response
|
58
|
+
api_type = "chat_completions" # Default
|
59
|
+
if hasattr(response, 'api_type'):
|
60
|
+
if response.api_type == "responses":
|
61
|
+
api_type = "responses"
|
62
|
+
elif response.api_type == "completions":
|
63
|
+
api_type = "completions"
|
64
|
+
|
65
|
+
# Convert input messages to LLMMessage format
|
66
|
+
input_messages = []
|
67
|
+
for msg in messages:
|
68
|
+
role = msg.get("role", "user")
|
69
|
+
content = msg.get("content", "")
|
70
|
+
|
71
|
+
# Handle different content formats
|
72
|
+
if isinstance(content, str):
|
73
|
+
parts = [LLMContentPart(type="text", text=content)]
|
74
|
+
elif isinstance(content, list):
|
75
|
+
parts = []
|
76
|
+
for item in content:
|
77
|
+
if isinstance(item, dict):
|
78
|
+
if item.get("type") == "text":
|
79
|
+
parts.append(LLMContentPart(type="text", text=item.get("text", "")))
|
80
|
+
elif item.get("type") == "image_url":
|
81
|
+
parts.append(LLMContentPart(
|
82
|
+
type="image",
|
83
|
+
uri=item.get("image_url", {}).get("url", ""),
|
84
|
+
mime_type="image/jpeg"
|
85
|
+
))
|
86
|
+
elif item.get("type") == "image":
|
87
|
+
parts.append(LLMContentPart(
|
88
|
+
type="image",
|
89
|
+
data=item.get("source", {}),
|
90
|
+
mime_type=item.get("source", {}).get("media_type", "image/jpeg")
|
91
|
+
))
|
92
|
+
else:
|
93
|
+
parts.append(LLMContentPart(type="text", text=str(item)))
|
94
|
+
else:
|
95
|
+
parts = [LLMContentPart(type="text", text=str(content))]
|
96
|
+
|
97
|
+
input_messages.append(LLMMessage(role=role, parts=parts))
|
98
|
+
|
99
|
+
# Extract output messages from response
|
100
|
+
output_messages = []
|
101
|
+
output_text = None
|
102
|
+
|
103
|
+
if hasattr(response, 'raw_response'):
|
104
|
+
# Extract assistant message
|
105
|
+
output_text = response.raw_response
|
106
|
+
output_messages.append(
|
107
|
+
LLMMessage(
|
108
|
+
role="assistant",
|
109
|
+
parts=[LLMContentPart(type="text", text=output_text)]
|
110
|
+
)
|
111
|
+
)
|
112
|
+
|
113
|
+
# Extract tool calls if present
|
114
|
+
output_tool_calls = []
|
115
|
+
if hasattr(response, 'tool_calls') and response.tool_calls:
|
116
|
+
for idx, tool_call in enumerate(response.tool_calls):
|
117
|
+
if isinstance(tool_call, dict):
|
118
|
+
output_tool_calls.append(
|
119
|
+
ToolCallSpec(
|
120
|
+
name=tool_call.get("function", {}).get("name", ""),
|
121
|
+
arguments_json=tool_call.get("function", {}).get("arguments", "{}"),
|
122
|
+
call_id=tool_call.get("id", f"tool_{idx}"),
|
123
|
+
index=idx
|
124
|
+
)
|
125
|
+
)
|
126
|
+
|
127
|
+
# Extract usage information
|
128
|
+
usage = None
|
129
|
+
if hasattr(response, 'usage') and response.usage:
|
130
|
+
usage = LLMUsage(
|
131
|
+
input_tokens=response.usage.get("input_tokens"),
|
132
|
+
output_tokens=response.usage.get("output_tokens"),
|
133
|
+
total_tokens=response.usage.get("total_tokens"),
|
134
|
+
cost_usd=response.usage.get("cost_usd"),
|
135
|
+
# Additional token accounting if available
|
136
|
+
reasoning_tokens=response.usage.get("reasoning_tokens"),
|
137
|
+
reasoning_input_tokens=response.usage.get("reasoning_input_tokens"),
|
138
|
+
reasoning_output_tokens=response.usage.get("reasoning_output_tokens"),
|
139
|
+
cache_write_tokens=response.usage.get("cache_write_tokens"),
|
140
|
+
cache_read_tokens=response.usage.get("cache_read_tokens"),
|
141
|
+
)
|
142
|
+
|
143
|
+
# Build request parameters
|
144
|
+
params = LLMRequestParams(
|
145
|
+
temperature=temperature,
|
146
|
+
top_p=request_params.get("top_p") if request_params else None,
|
147
|
+
max_tokens=request_params.get("max_tokens") if request_params else None,
|
148
|
+
stop=request_params.get("stop") if request_params else None,
|
149
|
+
raw_params=request_params or {}
|
150
|
+
)
|
151
|
+
|
152
|
+
# Handle response-specific fields
|
153
|
+
finish_reason = None
|
154
|
+
if hasattr(response, 'finish_reason'):
|
155
|
+
finish_reason = response.finish_reason
|
156
|
+
elif hasattr(response, 'stop_reason'):
|
157
|
+
finish_reason = response.stop_reason
|
158
|
+
|
159
|
+
# Create the call record
|
160
|
+
record = LLMCallRecord(
|
161
|
+
call_id=call_id,
|
162
|
+
api_type=api_type,
|
163
|
+
provider=provider,
|
164
|
+
model_name=model_name,
|
165
|
+
started_at=started_at or datetime.utcnow(),
|
166
|
+
completed_at=completed_at or datetime.utcnow(),
|
167
|
+
latency_ms=latency_ms,
|
168
|
+
request_params=params,
|
169
|
+
input_messages=input_messages,
|
170
|
+
input_text=None, # For completions API
|
171
|
+
tool_choice="auto" if tools else None,
|
172
|
+
output_messages=output_messages,
|
173
|
+
output_text=output_text,
|
174
|
+
output_tool_calls=output_tool_calls,
|
175
|
+
usage=usage,
|
176
|
+
finish_reason=finish_reason,
|
177
|
+
outcome="success",
|
178
|
+
metadata={
|
179
|
+
"has_tools": tools is not None,
|
180
|
+
"num_tools": len(tools) if tools else 0,
|
181
|
+
}
|
182
|
+
)
|
183
|
+
|
184
|
+
# Store response ID if available (for Responses API)
|
185
|
+
if hasattr(response, 'response_id') and response.response_id:
|
186
|
+
record.metadata["response_id"] = response.response_id
|
187
|
+
record.provider_request_id = response.response_id
|
188
|
+
|
189
|
+
return record
|
190
|
+
|
191
|
+
|
192
|
+
def compute_aggregates_from_call_records(call_records: List[LLMCallRecord]) -> Dict[str, Any]:
|
193
|
+
"""Compute aggregate statistics from a list of LLMCallRecord instances.
|
194
|
+
|
195
|
+
Args:
|
196
|
+
call_records: List of LLMCallRecord instances
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
Dictionary containing aggregated statistics
|
200
|
+
"""
|
201
|
+
aggregates = {
|
202
|
+
"input_tokens": 0,
|
203
|
+
"output_tokens": 0,
|
204
|
+
"total_tokens": 0,
|
205
|
+
"reasoning_tokens": 0,
|
206
|
+
"cost_usd": 0.0,
|
207
|
+
"latency_ms": 0,
|
208
|
+
"models_used": set(),
|
209
|
+
"providers_used": set(),
|
210
|
+
"tool_calls_count": 0,
|
211
|
+
"error_count": 0,
|
212
|
+
"success_count": 0,
|
213
|
+
"call_count": len(call_records)
|
214
|
+
}
|
215
|
+
|
216
|
+
for record in call_records:
|
217
|
+
# Token aggregation
|
218
|
+
if record.usage:
|
219
|
+
if record.usage.input_tokens:
|
220
|
+
aggregates["input_tokens"] += record.usage.input_tokens
|
221
|
+
if record.usage.output_tokens:
|
222
|
+
aggregates["output_tokens"] += record.usage.output_tokens
|
223
|
+
if record.usage.total_tokens:
|
224
|
+
aggregates["total_tokens"] += record.usage.total_tokens
|
225
|
+
if record.usage.reasoning_tokens:
|
226
|
+
aggregates["reasoning_tokens"] += record.usage.reasoning_tokens
|
227
|
+
if record.usage.cost_usd:
|
228
|
+
aggregates["cost_usd"] += record.usage.cost_usd
|
229
|
+
|
230
|
+
# Latency aggregation
|
231
|
+
if record.latency_ms:
|
232
|
+
aggregates["latency_ms"] += record.latency_ms
|
233
|
+
|
234
|
+
# Model and provider tracking
|
235
|
+
if record.model_name:
|
236
|
+
aggregates["models_used"].add(record.model_name)
|
237
|
+
if record.provider:
|
238
|
+
aggregates["providers_used"].add(record.provider)
|
239
|
+
|
240
|
+
# Tool calls
|
241
|
+
aggregates["tool_calls_count"] += len(record.output_tool_calls)
|
242
|
+
|
243
|
+
# Success/error tracking
|
244
|
+
if record.outcome == "error":
|
245
|
+
aggregates["error_count"] += 1
|
246
|
+
elif record.outcome == "success":
|
247
|
+
aggregates["success_count"] += 1
|
248
|
+
|
249
|
+
# Convert sets to lists for JSON serialization
|
250
|
+
aggregates["models_used"] = list(aggregates["models_used"])
|
251
|
+
aggregates["providers_used"] = list(aggregates["providers_used"])
|
252
|
+
|
253
|
+
# Compute averages
|
254
|
+
if aggregates["call_count"] > 0:
|
255
|
+
aggregates["avg_latency_ms"] = aggregates["latency_ms"] / aggregates["call_count"]
|
256
|
+
aggregates["avg_input_tokens"] = aggregates["input_tokens"] / aggregates["call_count"]
|
257
|
+
aggregates["avg_output_tokens"] = aggregates["output_tokens"] / aggregates["call_count"]
|
258
|
+
|
259
|
+
return aggregates
|
260
|
+
|
261
|
+
|
262
|
+
def create_llm_call_record_from_streaming(
|
263
|
+
chunks: List[LLMChunk],
|
264
|
+
model_name: str,
|
265
|
+
provider: str,
|
266
|
+
messages: List[Dict[str, Any]],
|
267
|
+
temperature: float = 0.8,
|
268
|
+
request_params: Optional[Dict[str, Any]] = None,
|
269
|
+
started_at: Optional[datetime] = None,
|
270
|
+
completed_at: Optional[datetime] = None,
|
271
|
+
) -> LLMCallRecord:
|
272
|
+
"""Create an LLMCallRecord from streaming chunks.
|
273
|
+
|
274
|
+
This function reconstructs a complete LLMCallRecord from streaming
|
275
|
+
response chunks, useful for Responses API or streaming Chat Completions.
|
276
|
+
|
277
|
+
Args:
|
278
|
+
chunks: List of LLMChunk instances from streaming
|
279
|
+
model_name: Name of the model used
|
280
|
+
provider: Provider name
|
281
|
+
messages: Input messages sent to the model
|
282
|
+
temperature: Temperature parameter used
|
283
|
+
request_params: Additional request parameters
|
284
|
+
started_at: When the request started
|
285
|
+
completed_at: When the request completed
|
286
|
+
|
287
|
+
Returns:
|
288
|
+
A populated LLMCallRecord instance
|
289
|
+
"""
|
290
|
+
# Reconstruct output text from chunks
|
291
|
+
output_text = "".join(
|
292
|
+
chunk.delta_text for chunk in chunks
|
293
|
+
if chunk.delta_text
|
294
|
+
)
|
295
|
+
|
296
|
+
# Calculate latency from chunk timestamps
|
297
|
+
latency_ms = None
|
298
|
+
if chunks and started_at:
|
299
|
+
last_chunk_time = chunks[-1].received_at
|
300
|
+
latency_ms = int((last_chunk_time - started_at).total_seconds() * 1000)
|
301
|
+
|
302
|
+
# Convert input messages
|
303
|
+
input_messages = []
|
304
|
+
for msg in messages:
|
305
|
+
role = msg.get("role", "user")
|
306
|
+
content = msg.get("content", "")
|
307
|
+
|
308
|
+
if isinstance(content, str):
|
309
|
+
parts = [LLMContentPart(type="text", text=content)]
|
310
|
+
else:
|
311
|
+
parts = [LLMContentPart(type="text", text=str(content))]
|
312
|
+
|
313
|
+
input_messages.append(LLMMessage(role=role, parts=parts))
|
314
|
+
|
315
|
+
# Create output message
|
316
|
+
output_messages = [
|
317
|
+
LLMMessage(
|
318
|
+
role="assistant",
|
319
|
+
parts=[LLMContentPart(type="text", text=output_text)]
|
320
|
+
)
|
321
|
+
]
|
322
|
+
|
323
|
+
# Build request parameters
|
324
|
+
params = LLMRequestParams(
|
325
|
+
temperature=temperature,
|
326
|
+
raw_params=request_params or {}
|
327
|
+
)
|
328
|
+
|
329
|
+
# Create the call record
|
330
|
+
record = LLMCallRecord(
|
331
|
+
call_id=str(uuid.uuid4()),
|
332
|
+
api_type="responses", # Streaming typically from Responses API
|
333
|
+
provider=provider,
|
334
|
+
model_name=model_name,
|
335
|
+
started_at=started_at or datetime.utcnow(),
|
336
|
+
completed_at=completed_at or datetime.utcnow(),
|
337
|
+
latency_ms=latency_ms,
|
338
|
+
request_params=params,
|
339
|
+
input_messages=input_messages,
|
340
|
+
output_messages=output_messages,
|
341
|
+
output_text=output_text,
|
342
|
+
chunks=chunks,
|
343
|
+
outcome="success",
|
344
|
+
metadata={
|
345
|
+
"chunk_count": len(chunks),
|
346
|
+
"streaming": True
|
347
|
+
}
|
348
|
+
)
|
349
|
+
|
350
|
+
return record
|