synth-ai 0.2.3__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/compound/cais.py +0 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +115 -1
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth.py +3 -3
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth_v2_backup.py +3 -3
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +4 -4
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/test_crafter_react_agent_openai_v2_backup.py +3 -3
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +1 -1
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/crafter_custom/environment.py +1 -1
- synth_ai/environments/service/core_routes.py +1 -1
- synth_ai/learning/prompts/mipro.py +8 -0
- synth_ai/lm/core/main_v3.py +219 -158
- synth_ai/tracing_v3/__init__.py +2 -2
- synth_ai/tracing_v3/abstractions.py +62 -17
- synth_ai/tracing_v3/hooks.py +1 -1
- synth_ai/tracing_v3/llm_call_record_helpers.py +350 -0
- synth_ai/tracing_v3/lm_call_record_abstractions.py +257 -0
- synth_ai/tracing_v3/session_tracer.py +5 -5
- synth_ai/tracing_v3/tests/test_concurrent_operations.py +1 -1
- synth_ai/tracing_v3/tests/test_llm_call_records.py +672 -0
- synth_ai/tracing_v3/tests/test_session_tracer.py +43 -9
- synth_ai/tracing_v3/tests/test_turso_manager.py +1 -1
- synth_ai/tracing_v3/turso/manager.py +10 -3
- synth_ai/tracing_v3/turso/models.py +1 -0
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/METADATA +3 -2
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/RECORD +30 -26
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.3.dist-info → synth_ai-0.2.4.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,257 @@
|
|
1
|
+
"""Unified abstractions for recording LLM API calls (inputs and results).
|
2
|
+
|
3
|
+
These records normalize different provider API shapes (Chat Completions,
|
4
|
+
Completions, Responses) into a single schema suitable for storage and
|
5
|
+
analysis, and are intended to be attached to LMCAISEvent(s) as a list of
|
6
|
+
call records.
|
7
|
+
|
8
|
+
Integration proposal:
|
9
|
+
- Update LMCAISEvent to store `call_records: list[LLMCallRecord]` and remove
|
10
|
+
per-call fields like `model_name`, `provider`, and token counts from the
|
11
|
+
event itself. Those belong on each LLMCallRecord. Aggregates (e.g.,
|
12
|
+
total_tokens across records, cost_usd) can remain on LMCAISEvent and be
|
13
|
+
derived from the records.
|
14
|
+
|
15
|
+
Design goals:
|
16
|
+
- Capture both input and output payloads in a provider-agnostic way.
|
17
|
+
- Preserve provider-specific request params for auditability.
|
18
|
+
- Represent tool calls (requested by the model) and tool results distinctly.
|
19
|
+
- Support streaming (optionally via `chunks`), but emphasize a final collapsed
|
20
|
+
`LLMCallRecord` for most analytics and fine-tuning data extraction.
|
21
|
+
"""
|
22
|
+
|
23
|
+
from __future__ import annotations
|
24
|
+
|
25
|
+
from dataclasses import dataclass, field
|
26
|
+
from typing import Any, Optional, Literal
|
27
|
+
from datetime import datetime
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class LLMUsage:
|
32
|
+
"""Token usage reported by the provider.
|
33
|
+
|
34
|
+
All fields are optional because some providers or stages may omit them.
|
35
|
+
"""
|
36
|
+
|
37
|
+
input_tokens: int | None = None
|
38
|
+
output_tokens: int | None = None
|
39
|
+
total_tokens: int | None = None
|
40
|
+
# Reasoning/chain-of-thought style token accounting (if provider exposes it)
|
41
|
+
reasoning_tokens: int | None = None
|
42
|
+
reasoning_input_tokens: int | None = None
|
43
|
+
reasoning_output_tokens: int | None = None
|
44
|
+
# Caching/billing/cost
|
45
|
+
cache_write_tokens: int | None = None
|
46
|
+
cache_read_tokens: int | None = None
|
47
|
+
billable_input_tokens: int | None = None
|
48
|
+
billable_output_tokens: int | None = None
|
49
|
+
cost_usd: float | None = None
|
50
|
+
|
51
|
+
|
52
|
+
@dataclass
|
53
|
+
class LLMRequestParams:
|
54
|
+
"""Provider request parameters.
|
55
|
+
|
56
|
+
Store provider-agnostic params explicitly and keep a `raw_params` map for
|
57
|
+
anything provider-specific (top_k, frequency_penalty, etc.).
|
58
|
+
"""
|
59
|
+
|
60
|
+
temperature: float | None = None
|
61
|
+
top_p: float | None = None
|
62
|
+
max_tokens: int | None = None
|
63
|
+
stop: list[str] | None = None
|
64
|
+
# Common non-agnostic knobs
|
65
|
+
top_k: int | None = None
|
66
|
+
presence_penalty: float | None = None
|
67
|
+
frequency_penalty: float | None = None
|
68
|
+
repetition_penalty: float | None = None
|
69
|
+
seed: int | None = None
|
70
|
+
n: int | None = None
|
71
|
+
best_of: int | None = None
|
72
|
+
response_format: dict[str, Any] | None = None
|
73
|
+
json_mode: bool | None = None
|
74
|
+
tool_config: dict[str, Any] | None = None
|
75
|
+
raw_params: dict[str, Any] = field(default_factory=dict)
|
76
|
+
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class LLMContentPart:
|
80
|
+
"""A content item within a message (text, tool-structured JSON, image, etc.)."""
|
81
|
+
|
82
|
+
type: str
|
83
|
+
text: str | None = None
|
84
|
+
# For Responses API or multimodal payloads, keep a generic value
|
85
|
+
data: dict[str, Any] | None = None
|
86
|
+
# Blob reference fields (for image/audio/video)
|
87
|
+
mime_type: str | None = None
|
88
|
+
uri: str | None = None
|
89
|
+
base64_data: str | None = None
|
90
|
+
size_bytes: int | None = None
|
91
|
+
sha256: str | None = None
|
92
|
+
width: int | None = None
|
93
|
+
height: int | None = None
|
94
|
+
duration_ms: int | None = None
|
95
|
+
sample_rate: int | None = None
|
96
|
+
channels: int | None = None
|
97
|
+
language: str | None = None
|
98
|
+
|
99
|
+
|
100
|
+
@dataclass
|
101
|
+
class LLMMessage:
|
102
|
+
"""A message in a chat-style exchange.
|
103
|
+
|
104
|
+
For Completions-style calls, `role="user"` with one text part is typical for input,
|
105
|
+
and `role="assistant"` for output. Responses API can emit multiple parts;
|
106
|
+
use `parts` for generality.
|
107
|
+
"""
|
108
|
+
|
109
|
+
role: str # e.g., system, user, assistant, tool, function, developer
|
110
|
+
parts: list[LLMContentPart] = field(default_factory=list)
|
111
|
+
name: str | None = None
|
112
|
+
tool_call_id: str | None = None
|
113
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
114
|
+
|
115
|
+
|
116
|
+
@dataclass
|
117
|
+
class ToolCallSpec:
|
118
|
+
"""A tool/function call requested by the model (not yet executed)."""
|
119
|
+
|
120
|
+
name: str
|
121
|
+
arguments_json: str # serialized JSON payload provided by the model
|
122
|
+
arguments: dict[str, Any] | None = None # parsed convenience
|
123
|
+
call_id: str | None = None # provider-assigned or synthesized
|
124
|
+
index: int | None = None # ordinal within a batch
|
125
|
+
parent_call_id: str | None = None
|
126
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
127
|
+
|
128
|
+
|
129
|
+
@dataclass
|
130
|
+
class ToolCallResult:
|
131
|
+
"""The result of executing a tool/function call outside the model.
|
132
|
+
|
133
|
+
This is distinct from the model's own output. Attach execution details for
|
134
|
+
auditability.
|
135
|
+
"""
|
136
|
+
|
137
|
+
call_id: str | None = None # correlate to ToolCallSpec
|
138
|
+
output_text: str | None = None
|
139
|
+
exit_code: int | None = None
|
140
|
+
status: Literal["ok", "error"] | None = None
|
141
|
+
error_message: str | None = None
|
142
|
+
started_at: datetime | None = None
|
143
|
+
completed_at: datetime | None = None
|
144
|
+
duration_ms: int | None = None
|
145
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
146
|
+
|
147
|
+
|
148
|
+
@dataclass
|
149
|
+
class LLMChunk:
|
150
|
+
"""Optional streaming chunk representation (for Responses/Chat streaming)."""
|
151
|
+
|
152
|
+
sequence_index: int
|
153
|
+
received_at: datetime
|
154
|
+
event_type: str | None = None # e.g., content.delta, tool.delta, message.stop
|
155
|
+
choice_index: int | None = None
|
156
|
+
raw_json: str | None = None
|
157
|
+
delta_text: str | None = None
|
158
|
+
delta: dict[str, Any] | None = None
|
159
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
160
|
+
|
161
|
+
|
162
|
+
@dataclass
|
163
|
+
class LLMCallRecord:
|
164
|
+
"""Normalized record of a single LLM API call.
|
165
|
+
|
166
|
+
Fields capture both the request (input) and the response (output), with
|
167
|
+
optional tool calls and results as emitted by/through the agent runtime.
|
168
|
+
"""
|
169
|
+
|
170
|
+
# Identity and classification
|
171
|
+
call_id: str
|
172
|
+
api_type: str # e.g., "chat_completions", "completions", "responses"
|
173
|
+
provider: str | None = None # e.g., "openai", "anthropic"
|
174
|
+
model_name: str = ""
|
175
|
+
schema_version: str = "1.0"
|
176
|
+
|
177
|
+
# Timing
|
178
|
+
started_at: datetime | None = None
|
179
|
+
completed_at: datetime | None = None
|
180
|
+
latency_ms: int | None = None # convenience cache (completed - started)
|
181
|
+
|
182
|
+
# Request
|
183
|
+
request_params: LLMRequestParams = field(default_factory=LLMRequestParams)
|
184
|
+
input_messages: list[LLMMessage] = field(default_factory=list)
|
185
|
+
input_text: str | None = None # for completions-style prompts
|
186
|
+
tool_choice: str | None = None # e.g., "auto", "none", or a specific tool
|
187
|
+
|
188
|
+
# Response
|
189
|
+
output_messages: list[LLMMessage] = field(default_factory=list)
|
190
|
+
outputs: list[LLMMessage] = field(default_factory=list) # for n>1 choices
|
191
|
+
output_text: str | None = None # for completions-style outputs
|
192
|
+
output_tool_calls: list[ToolCallSpec] = field(default_factory=list)
|
193
|
+
usage: LLMUsage | None = None
|
194
|
+
finish_reason: str | None = None
|
195
|
+
choice_index: int | None = None
|
196
|
+
|
197
|
+
# Tool execution results (post-model, optional)
|
198
|
+
tool_results: list[ToolCallResult] = field(default_factory=list)
|
199
|
+
|
200
|
+
# Streaming (optional)
|
201
|
+
chunks: list[LLMChunk] | None = None
|
202
|
+
|
203
|
+
# Raw payloads for audit/debugging
|
204
|
+
request_raw_json: str | None = None
|
205
|
+
response_raw_json: str | None = None
|
206
|
+
|
207
|
+
# Provider- or call-specific extra data (tracing ids, etc.)
|
208
|
+
span_id: str | None = None
|
209
|
+
trace_id: str | None = None
|
210
|
+
provider_request_id: str | None = None
|
211
|
+
request_server_timing: dict[str, Any] | None = None
|
212
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
213
|
+
# Error/outcome
|
214
|
+
outcome: Literal["success", "error", "timeout", "cancelled"] | None = None
|
215
|
+
error: dict[str, Any] | None = None # {code, message, type, raw}
|
216
|
+
# Logprob traces (optional)
|
217
|
+
token_traces: list[dict[str, Any]] | None = None
|
218
|
+
# Safety/refusal (optional)
|
219
|
+
safety: dict[str, Any] | None = None
|
220
|
+
refusal: dict[str, Any] | None = None
|
221
|
+
# Privacy/redactions
|
222
|
+
redactions: list[dict[str, Any]] | None = None
|
223
|
+
|
224
|
+
|
225
|
+
def compute_latency_ms(record: LLMCallRecord) -> Optional[int]:
|
226
|
+
"""Compute and update latency_ms from timestamps if available."""
|
227
|
+
if record.started_at and record.completed_at:
|
228
|
+
delta = int((record.completed_at - record.started_at).total_seconds() * 1000)
|
229
|
+
record.latency_ms = delta
|
230
|
+
return delta
|
231
|
+
return record.latency_ms
|
232
|
+
|
233
|
+
|
234
|
+
# Provider mapping guidance (summary)
|
235
|
+
# -----------------------------------
|
236
|
+
# - OpenAI Chat Completions:
|
237
|
+
# - api_type = "chat_completions"
|
238
|
+
# - input_messages from `messages`, output_messages from `choices[].message`
|
239
|
+
# - usage from response.usage
|
240
|
+
# - tool_calls map to ToolCallSpec (choices[].message.tool_calls)
|
241
|
+
#
|
242
|
+
# - OpenAI Completions:
|
243
|
+
# - api_type = "completions"
|
244
|
+
# - input_text from `prompt`, output_text from `choices[].text`
|
245
|
+
# - usage from response.usage
|
246
|
+
#
|
247
|
+
# - OpenAI Responses API (streamed):
|
248
|
+
# - api_type = "responses"
|
249
|
+
# - input_messages from `input[]` or `messages[]` per content type
|
250
|
+
# - output_messages from streamed `message` nodes; usage from terminal chunk
|
251
|
+
# - chunks hold raw SSE segments if desired
|
252
|
+
# - tool_calls from streamed `function_call`/`tool_call` nodes
|
253
|
+
#
|
254
|
+
# Tool execution results should be attached as ToolCallResult entries when the
|
255
|
+
# agent runtime executes the requested tool(s) and has ground-truth outputs.
|
256
|
+
|
257
|
+
|
@@ -5,7 +5,7 @@ from datetime import datetime
|
|
5
5
|
from typing import Dict, List, Optional, Any, Union
|
6
6
|
from contextlib import asynccontextmanager
|
7
7
|
|
8
|
-
from .abstractions import SessionTrace, SessionTimeStep, BaseEvent,
|
8
|
+
from .abstractions import SessionTrace, SessionTimeStep, BaseEvent, SessionEventMarkovBlanketMessage, TimeRecord
|
9
9
|
from .decorators import set_session_id, set_turn_number, set_session_tracer, SessionContext
|
10
10
|
from .turso.manager import AsyncSQLTraceManager
|
11
11
|
from .config import CONFIG
|
@@ -93,7 +93,7 @@ class SessionTracer:
|
|
93
93
|
created_at=datetime.utcnow(),
|
94
94
|
session_time_steps=[],
|
95
95
|
event_history=[],
|
96
|
-
|
96
|
+
markov_blanket_message_history=[],
|
97
97
|
metadata=metadata or {},
|
98
98
|
)
|
99
99
|
|
@@ -215,7 +215,7 @@ class SessionTracer:
|
|
215
215
|
if self._current_trace is None:
|
216
216
|
raise RuntimeError("No active session")
|
217
217
|
|
218
|
-
msg =
|
218
|
+
msg = SessionEventMarkovBlanketMessage(
|
219
219
|
content=content,
|
220
220
|
message_type=message_type,
|
221
221
|
time_record=TimeRecord(
|
@@ -232,9 +232,9 @@ class SessionTracer:
|
|
232
232
|
await self.hooks.trigger("message_recorded", message=msg)
|
233
233
|
|
234
234
|
# Add to histories
|
235
|
-
self._current_trace.
|
235
|
+
self._current_trace.markov_blanket_message_history.append(msg)
|
236
236
|
if self._current_step:
|
237
|
-
self._current_step.
|
237
|
+
self._current_step.markov_blanket_messages.append(msg)
|
238
238
|
|
239
239
|
async def end_session(self, save: bool = None) -> SessionTrace:
|
240
240
|
"""End the current session.
|
@@ -20,7 +20,7 @@ from typing import List
|
|
20
20
|
from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
|
21
21
|
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
22
22
|
from synth_ai.tracing_v3.abstractions import (
|
23
|
-
|
23
|
+
SessionEventMarkovBlanketMessage,
|
24
24
|
TimeRecord,
|
25
25
|
RuntimeEvent,
|
26
26
|
EnvironmentEvent,
|