synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. synth_ai/cli/__init__.py +66 -0
  2. synth_ai/cli/balance.py +205 -0
  3. synth_ai/cli/calc.py +70 -0
  4. synth_ai/cli/demo.py +74 -0
  5. synth_ai/{cli.py → cli/legacy_root_backup.py} +60 -15
  6. synth_ai/cli/man.py +103 -0
  7. synth_ai/cli/recent.py +126 -0
  8. synth_ai/cli/root.py +184 -0
  9. synth_ai/cli/status.py +126 -0
  10. synth_ai/cli/traces.py +136 -0
  11. synth_ai/cli/watch.py +508 -0
  12. synth_ai/config/base_url.py +53 -0
  13. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +252 -0
  14. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_duckdb_v2_backup.py +413 -0
  15. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +760 -0
  16. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_synth.py +34 -0
  17. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth.py +1740 -0
  18. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth_v2_backup.py +1318 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_duckdb_v2_backup.py +386 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v2_backup.py +1352 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +4 -4
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/test_crafter_react_agent_openai_v2_backup.py +2551 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1 -1
  25. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +1 -1
  26. synth_ai/environments/examples/crafter_classic/agent_demos/old/traces/session_crafter_episode_16_15227b68-2906-416f-acc4-d6a9b4fa5828_20250725_001154.json +1363 -1
  27. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +3 -3
  28. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  29. synth_ai/environments/examples/crafter_custom/environment.py +1 -1
  30. synth_ai/environments/examples/enron/dataset/corbt___enron_emails_sample_questions/default/0.0.0/293c9fe8170037e01cc9cf5834e0cd5ef6f1a6bb/dataset_info.json +1 -0
  31. synth_ai/environments/examples/nethack/helpers/achievements.json +64 -0
  32. synth_ai/environments/examples/red/units/test_exploration_strategy.py +1 -1
  33. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +5 -5
  34. synth_ai/environments/examples/red/units/test_movement_debug.py +2 -2
  35. synth_ai/environments/examples/red/units/test_retry_movement.py +1 -1
  36. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/available_envs.json +122 -0
  37. synth_ai/environments/examples/sokoban/verified_puzzles.json +54987 -0
  38. synth_ai/environments/service/core_routes.py +1 -1
  39. synth_ai/experimental/synth_oss.py +446 -0
  40. synth_ai/learning/core.py +21 -0
  41. synth_ai/learning/gateway.py +4 -0
  42. synth_ai/learning/prompts/gepa.py +0 -0
  43. synth_ai/learning/prompts/mipro.py +8 -0
  44. synth_ai/lm/__init__.py +3 -0
  45. synth_ai/lm/core/main.py +4 -0
  46. synth_ai/lm/core/main_v3.py +238 -122
  47. synth_ai/lm/core/vendor_clients.py +4 -0
  48. synth_ai/lm/provider_support/openai.py +11 -2
  49. synth_ai/lm/vendors/base.py +7 -0
  50. synth_ai/lm/vendors/openai_standard.py +339 -4
  51. synth_ai/lm/vendors/openai_standard_responses.py +243 -0
  52. synth_ai/lm/vendors/synth_client.py +155 -5
  53. synth_ai/lm/warmup.py +54 -17
  54. synth_ai/tracing/__init__.py +18 -0
  55. synth_ai/tracing_v1/__init__.py +29 -14
  56. synth_ai/tracing_v3/__init__.py +2 -2
  57. synth_ai/tracing_v3/abstractions.py +62 -17
  58. synth_ai/tracing_v3/config.py +13 -7
  59. synth_ai/tracing_v3/db_config.py +6 -6
  60. synth_ai/tracing_v3/hooks.py +1 -1
  61. synth_ai/tracing_v3/llm_call_record_helpers.py +350 -0
  62. synth_ai/tracing_v3/lm_call_record_abstractions.py +257 -0
  63. synth_ai/tracing_v3/session_tracer.py +5 -5
  64. synth_ai/tracing_v3/tests/test_concurrent_operations.py +1 -1
  65. synth_ai/tracing_v3/tests/test_llm_call_records.py +672 -0
  66. synth_ai/tracing_v3/tests/test_session_tracer.py +43 -9
  67. synth_ai/tracing_v3/tests/test_turso_manager.py +1 -1
  68. synth_ai/tracing_v3/turso/manager.py +18 -11
  69. synth_ai/tracing_v3/turso/models.py +1 -0
  70. synth_ai/tui/__main__.py +13 -0
  71. synth_ai/tui/dashboard.py +329 -0
  72. synth_ai/v0/tracing/__init__.py +0 -0
  73. synth_ai/{tracing → v0/tracing}/base_client.py +3 -3
  74. synth_ai/{tracing → v0/tracing}/client_manager.py +1 -1
  75. synth_ai/{tracing → v0/tracing}/context.py +1 -1
  76. synth_ai/{tracing → v0/tracing}/decorators.py +11 -11
  77. synth_ai/v0/tracing/events/__init__.py +0 -0
  78. synth_ai/{tracing → v0/tracing}/events/manage.py +4 -4
  79. synth_ai/{tracing → v0/tracing}/events/scope.py +6 -6
  80. synth_ai/{tracing → v0/tracing}/events/store.py +3 -3
  81. synth_ai/{tracing → v0/tracing}/immediate_client.py +6 -6
  82. synth_ai/{tracing → v0/tracing}/log_client_base.py +2 -2
  83. synth_ai/{tracing → v0/tracing}/retry_queue.py +3 -3
  84. synth_ai/{tracing → v0/tracing}/trackers.py +2 -2
  85. synth_ai/{tracing → v0/tracing}/upload.py +4 -4
  86. synth_ai/v0/tracing_v1/__init__.py +16 -0
  87. synth_ai/{tracing_v1 → v0/tracing_v1}/base_client.py +3 -3
  88. synth_ai/{tracing_v1 → v0/tracing_v1}/client_manager.py +1 -1
  89. synth_ai/{tracing_v1 → v0/tracing_v1}/context.py +1 -1
  90. synth_ai/{tracing_v1 → v0/tracing_v1}/decorators.py +11 -11
  91. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  92. synth_ai/{tracing_v1 → v0/tracing_v1}/events/manage.py +4 -4
  93. synth_ai/{tracing_v1 → v0/tracing_v1}/events/scope.py +6 -6
  94. synth_ai/{tracing_v1 → v0/tracing_v1}/events/store.py +3 -3
  95. synth_ai/{tracing_v1 → v0/tracing_v1}/immediate_client.py +6 -6
  96. synth_ai/{tracing_v1 → v0/tracing_v1}/log_client_base.py +2 -2
  97. synth_ai/{tracing_v1 → v0/tracing_v1}/retry_queue.py +3 -3
  98. synth_ai/{tracing_v1 → v0/tracing_v1}/trackers.py +2 -2
  99. synth_ai/{tracing_v1 → v0/tracing_v1}/upload.py +4 -4
  100. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/METADATA +100 -5
  101. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/RECORD +115 -75
  102. /synth_ai/{tracing/events/__init__.py → compound/cais.py} +0 -0
  103. /synth_ai/{tracing_v1/events/__init__.py → environments/examples/crafter_classic/debug_translation.py} +0 -0
  104. /synth_ai/{tracing → v0/tracing}/abstractions.py +0 -0
  105. /synth_ai/{tracing → v0/tracing}/config.py +0 -0
  106. /synth_ai/{tracing → v0/tracing}/local.py +0 -0
  107. /synth_ai/{tracing → v0/tracing}/utils.py +0 -0
  108. /synth_ai/{tracing_v1 → v0/tracing_v1}/abstractions.py +0 -0
  109. /synth_ai/{tracing_v1 → v0/tracing_v1}/config.py +0 -0
  110. /synth_ai/{tracing_v1 → v0/tracing_v1}/local.py +0 -0
  111. /synth_ai/{tracing_v1 → v0/tracing_v1}/utils.py +0 -0
  112. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/WHEEL +0 -0
  113. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
  114. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
  115. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,350 @@
1
+ """Helper functions for creating and populating LLMCallRecord instances.
2
+
3
+ This module provides utilities to convert vendor responses to LLMCallRecord
4
+ format and compute aggregates from call records.
5
+ """
6
+
7
+ import uuid
8
+ import json
9
+ from datetime import datetime
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ from synth_ai.tracing_v3.lm_call_record_abstractions import (
13
+ LLMCallRecord,
14
+ LLMUsage,
15
+ LLMRequestParams,
16
+ LLMMessage,
17
+ LLMContentPart,
18
+ ToolCallSpec,
19
+ ToolCallResult,
20
+ LLMChunk,
21
+ )
22
+ from synth_ai.lm.vendors.base import BaseLMResponse
23
+
24
+
25
+ def create_llm_call_record_from_response(
26
+ response: BaseLMResponse,
27
+ model_name: str,
28
+ provider: str,
29
+ messages: List[Dict[str, Any]],
30
+ temperature: float = 0.8,
31
+ request_params: Optional[Dict[str, Any]] = None,
32
+ tools: Optional[List] = None,
33
+ started_at: Optional[datetime] = None,
34
+ completed_at: Optional[datetime] = None,
35
+ latency_ms: Optional[int] = None,
36
+ ) -> LLMCallRecord:
37
+ """Create an LLMCallRecord from a vendor response.
38
+
39
+ Args:
40
+ response: The vendor response object
41
+ model_name: Name of the model used
42
+ provider: Provider name (e.g., 'openai', 'anthropic')
43
+ messages: Input messages sent to the model
44
+ temperature: Temperature parameter used
45
+ request_params: Additional request parameters
46
+ tools: Tools provided to the model
47
+ started_at: When the request started
48
+ completed_at: When the request completed
49
+ latency_ms: End-to-end latency in milliseconds
50
+
51
+ Returns:
52
+ A populated LLMCallRecord instance
53
+ """
54
+ # Generate call ID
55
+ call_id = str(uuid.uuid4())
56
+
57
+ # Determine API type from response
58
+ api_type = "chat_completions" # Default
59
+ if hasattr(response, 'api_type'):
60
+ if response.api_type == "responses":
61
+ api_type = "responses"
62
+ elif response.api_type == "completions":
63
+ api_type = "completions"
64
+
65
+ # Convert input messages to LLMMessage format
66
+ input_messages = []
67
+ for msg in messages:
68
+ role = msg.get("role", "user")
69
+ content = msg.get("content", "")
70
+
71
+ # Handle different content formats
72
+ if isinstance(content, str):
73
+ parts = [LLMContentPart(type="text", text=content)]
74
+ elif isinstance(content, list):
75
+ parts = []
76
+ for item in content:
77
+ if isinstance(item, dict):
78
+ if item.get("type") == "text":
79
+ parts.append(LLMContentPart(type="text", text=item.get("text", "")))
80
+ elif item.get("type") == "image_url":
81
+ parts.append(LLMContentPart(
82
+ type="image",
83
+ uri=item.get("image_url", {}).get("url", ""),
84
+ mime_type="image/jpeg"
85
+ ))
86
+ elif item.get("type") == "image":
87
+ parts.append(LLMContentPart(
88
+ type="image",
89
+ data=item.get("source", {}),
90
+ mime_type=item.get("source", {}).get("media_type", "image/jpeg")
91
+ ))
92
+ else:
93
+ parts.append(LLMContentPart(type="text", text=str(item)))
94
+ else:
95
+ parts = [LLMContentPart(type="text", text=str(content))]
96
+
97
+ input_messages.append(LLMMessage(role=role, parts=parts))
98
+
99
+ # Extract output messages from response
100
+ output_messages = []
101
+ output_text = None
102
+
103
+ if hasattr(response, 'raw_response'):
104
+ # Extract assistant message
105
+ output_text = response.raw_response
106
+ output_messages.append(
107
+ LLMMessage(
108
+ role="assistant",
109
+ parts=[LLMContentPart(type="text", text=output_text)]
110
+ )
111
+ )
112
+
113
+ # Extract tool calls if present
114
+ output_tool_calls = []
115
+ if hasattr(response, 'tool_calls') and response.tool_calls:
116
+ for idx, tool_call in enumerate(response.tool_calls):
117
+ if isinstance(tool_call, dict):
118
+ output_tool_calls.append(
119
+ ToolCallSpec(
120
+ name=tool_call.get("function", {}).get("name", ""),
121
+ arguments_json=tool_call.get("function", {}).get("arguments", "{}"),
122
+ call_id=tool_call.get("id", f"tool_{idx}"),
123
+ index=idx
124
+ )
125
+ )
126
+
127
+ # Extract usage information
128
+ usage = None
129
+ if hasattr(response, 'usage') and response.usage:
130
+ usage = LLMUsage(
131
+ input_tokens=response.usage.get("input_tokens"),
132
+ output_tokens=response.usage.get("output_tokens"),
133
+ total_tokens=response.usage.get("total_tokens"),
134
+ cost_usd=response.usage.get("cost_usd"),
135
+ # Additional token accounting if available
136
+ reasoning_tokens=response.usage.get("reasoning_tokens"),
137
+ reasoning_input_tokens=response.usage.get("reasoning_input_tokens"),
138
+ reasoning_output_tokens=response.usage.get("reasoning_output_tokens"),
139
+ cache_write_tokens=response.usage.get("cache_write_tokens"),
140
+ cache_read_tokens=response.usage.get("cache_read_tokens"),
141
+ )
142
+
143
+ # Build request parameters
144
+ params = LLMRequestParams(
145
+ temperature=temperature,
146
+ top_p=request_params.get("top_p") if request_params else None,
147
+ max_tokens=request_params.get("max_tokens") if request_params else None,
148
+ stop=request_params.get("stop") if request_params else None,
149
+ raw_params=request_params or {}
150
+ )
151
+
152
+ # Handle response-specific fields
153
+ finish_reason = None
154
+ if hasattr(response, 'finish_reason'):
155
+ finish_reason = response.finish_reason
156
+ elif hasattr(response, 'stop_reason'):
157
+ finish_reason = response.stop_reason
158
+
159
+ # Create the call record
160
+ record = LLMCallRecord(
161
+ call_id=call_id,
162
+ api_type=api_type,
163
+ provider=provider,
164
+ model_name=model_name,
165
+ started_at=started_at or datetime.utcnow(),
166
+ completed_at=completed_at or datetime.utcnow(),
167
+ latency_ms=latency_ms,
168
+ request_params=params,
169
+ input_messages=input_messages,
170
+ input_text=None, # For completions API
171
+ tool_choice="auto" if tools else None,
172
+ output_messages=output_messages,
173
+ output_text=output_text,
174
+ output_tool_calls=output_tool_calls,
175
+ usage=usage,
176
+ finish_reason=finish_reason,
177
+ outcome="success",
178
+ metadata={
179
+ "has_tools": tools is not None,
180
+ "num_tools": len(tools) if tools else 0,
181
+ }
182
+ )
183
+
184
+ # Store response ID if available (for Responses API)
185
+ if hasattr(response, 'response_id') and response.response_id:
186
+ record.metadata["response_id"] = response.response_id
187
+ record.provider_request_id = response.response_id
188
+
189
+ return record
190
+
191
+
192
+ def compute_aggregates_from_call_records(call_records: List[LLMCallRecord]) -> Dict[str, Any]:
193
+ """Compute aggregate statistics from a list of LLMCallRecord instances.
194
+
195
+ Args:
196
+ call_records: List of LLMCallRecord instances
197
+
198
+ Returns:
199
+ Dictionary containing aggregated statistics
200
+ """
201
+ aggregates = {
202
+ "input_tokens": 0,
203
+ "output_tokens": 0,
204
+ "total_tokens": 0,
205
+ "reasoning_tokens": 0,
206
+ "cost_usd": 0.0,
207
+ "latency_ms": 0,
208
+ "models_used": set(),
209
+ "providers_used": set(),
210
+ "tool_calls_count": 0,
211
+ "error_count": 0,
212
+ "success_count": 0,
213
+ "call_count": len(call_records)
214
+ }
215
+
216
+ for record in call_records:
217
+ # Token aggregation
218
+ if record.usage:
219
+ if record.usage.input_tokens:
220
+ aggregates["input_tokens"] += record.usage.input_tokens
221
+ if record.usage.output_tokens:
222
+ aggregates["output_tokens"] += record.usage.output_tokens
223
+ if record.usage.total_tokens:
224
+ aggregates["total_tokens"] += record.usage.total_tokens
225
+ if record.usage.reasoning_tokens:
226
+ aggregates["reasoning_tokens"] += record.usage.reasoning_tokens
227
+ if record.usage.cost_usd:
228
+ aggregates["cost_usd"] += record.usage.cost_usd
229
+
230
+ # Latency aggregation
231
+ if record.latency_ms:
232
+ aggregates["latency_ms"] += record.latency_ms
233
+
234
+ # Model and provider tracking
235
+ if record.model_name:
236
+ aggregates["models_used"].add(record.model_name)
237
+ if record.provider:
238
+ aggregates["providers_used"].add(record.provider)
239
+
240
+ # Tool calls
241
+ aggregates["tool_calls_count"] += len(record.output_tool_calls)
242
+
243
+ # Success/error tracking
244
+ if record.outcome == "error":
245
+ aggregates["error_count"] += 1
246
+ elif record.outcome == "success":
247
+ aggregates["success_count"] += 1
248
+
249
+ # Convert sets to lists for JSON serialization
250
+ aggregates["models_used"] = list(aggregates["models_used"])
251
+ aggregates["providers_used"] = list(aggregates["providers_used"])
252
+
253
+ # Compute averages
254
+ if aggregates["call_count"] > 0:
255
+ aggregates["avg_latency_ms"] = aggregates["latency_ms"] / aggregates["call_count"]
256
+ aggregates["avg_input_tokens"] = aggregates["input_tokens"] / aggregates["call_count"]
257
+ aggregates["avg_output_tokens"] = aggregates["output_tokens"] / aggregates["call_count"]
258
+
259
+ return aggregates
260
+
261
+
262
+ def create_llm_call_record_from_streaming(
263
+ chunks: List[LLMChunk],
264
+ model_name: str,
265
+ provider: str,
266
+ messages: List[Dict[str, Any]],
267
+ temperature: float = 0.8,
268
+ request_params: Optional[Dict[str, Any]] = None,
269
+ started_at: Optional[datetime] = None,
270
+ completed_at: Optional[datetime] = None,
271
+ ) -> LLMCallRecord:
272
+ """Create an LLMCallRecord from streaming chunks.
273
+
274
+ This function reconstructs a complete LLMCallRecord from streaming
275
+ response chunks, useful for Responses API or streaming Chat Completions.
276
+
277
+ Args:
278
+ chunks: List of LLMChunk instances from streaming
279
+ model_name: Name of the model used
280
+ provider: Provider name
281
+ messages: Input messages sent to the model
282
+ temperature: Temperature parameter used
283
+ request_params: Additional request parameters
284
+ started_at: When the request started
285
+ completed_at: When the request completed
286
+
287
+ Returns:
288
+ A populated LLMCallRecord instance
289
+ """
290
+ # Reconstruct output text from chunks
291
+ output_text = "".join(
292
+ chunk.delta_text for chunk in chunks
293
+ if chunk.delta_text
294
+ )
295
+
296
+ # Calculate latency from chunk timestamps
297
+ latency_ms = None
298
+ if chunks and started_at:
299
+ last_chunk_time = chunks[-1].received_at
300
+ latency_ms = int((last_chunk_time - started_at).total_seconds() * 1000)
301
+
302
+ # Convert input messages
303
+ input_messages = []
304
+ for msg in messages:
305
+ role = msg.get("role", "user")
306
+ content = msg.get("content", "")
307
+
308
+ if isinstance(content, str):
309
+ parts = [LLMContentPart(type="text", text=content)]
310
+ else:
311
+ parts = [LLMContentPart(type="text", text=str(content))]
312
+
313
+ input_messages.append(LLMMessage(role=role, parts=parts))
314
+
315
+ # Create output message
316
+ output_messages = [
317
+ LLMMessage(
318
+ role="assistant",
319
+ parts=[LLMContentPart(type="text", text=output_text)]
320
+ )
321
+ ]
322
+
323
+ # Build request parameters
324
+ params = LLMRequestParams(
325
+ temperature=temperature,
326
+ raw_params=request_params or {}
327
+ )
328
+
329
+ # Create the call record
330
+ record = LLMCallRecord(
331
+ call_id=str(uuid.uuid4()),
332
+ api_type="responses", # Streaming typically from Responses API
333
+ provider=provider,
334
+ model_name=model_name,
335
+ started_at=started_at or datetime.utcnow(),
336
+ completed_at=completed_at or datetime.utcnow(),
337
+ latency_ms=latency_ms,
338
+ request_params=params,
339
+ input_messages=input_messages,
340
+ output_messages=output_messages,
341
+ output_text=output_text,
342
+ chunks=chunks,
343
+ outcome="success",
344
+ metadata={
345
+ "chunk_count": len(chunks),
346
+ "streaming": True
347
+ }
348
+ )
349
+
350
+ return record
@@ -0,0 +1,257 @@
1
+ """Unified abstractions for recording LLM API calls (inputs and results).
2
+
3
+ These records normalize different provider API shapes (Chat Completions,
4
+ Completions, Responses) into a single schema suitable for storage and
5
+ analysis, and are intended to be attached to LMCAISEvent(s) as a list of
6
+ call records.
7
+
8
+ Integration proposal:
9
+ - Update LMCAISEvent to store `call_records: list[LLMCallRecord]` and remove
10
+ per-call fields like `model_name`, `provider`, and token counts from the
11
+ event itself. Those belong on each LLMCallRecord. Aggregates (e.g.,
12
+ total_tokens across records, cost_usd) can remain on LMCAISEvent and be
13
+ derived from the records.
14
+
15
+ Design goals:
16
+ - Capture both input and output payloads in a provider-agnostic way.
17
+ - Preserve provider-specific request params for auditability.
18
+ - Represent tool calls (requested by the model) and tool results distinctly.
19
+ - Support streaming (optionally via `chunks`), but emphasize a final collapsed
20
+ `LLMCallRecord` for most analytics and fine-tuning data extraction.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from dataclasses import dataclass, field
26
+ from typing import Any, Optional, Literal
27
+ from datetime import datetime
28
+
29
+
30
+ @dataclass
31
+ class LLMUsage:
32
+ """Token usage reported by the provider.
33
+
34
+ All fields are optional because some providers or stages may omit them.
35
+ """
36
+
37
+ input_tokens: int | None = None
38
+ output_tokens: int | None = None
39
+ total_tokens: int | None = None
40
+ # Reasoning/chain-of-thought style token accounting (if provider exposes it)
41
+ reasoning_tokens: int | None = None
42
+ reasoning_input_tokens: int | None = None
43
+ reasoning_output_tokens: int | None = None
44
+ # Caching/billing/cost
45
+ cache_write_tokens: int | None = None
46
+ cache_read_tokens: int | None = None
47
+ billable_input_tokens: int | None = None
48
+ billable_output_tokens: int | None = None
49
+ cost_usd: float | None = None
50
+
51
+
52
+ @dataclass
53
+ class LLMRequestParams:
54
+ """Provider request parameters.
55
+
56
+ Store provider-agnostic params explicitly and keep a `raw_params` map for
57
+ anything provider-specific (top_k, frequency_penalty, etc.).
58
+ """
59
+
60
+ temperature: float | None = None
61
+ top_p: float | None = None
62
+ max_tokens: int | None = None
63
+ stop: list[str] | None = None
64
+ # Common non-agnostic knobs
65
+ top_k: int | None = None
66
+ presence_penalty: float | None = None
67
+ frequency_penalty: float | None = None
68
+ repetition_penalty: float | None = None
69
+ seed: int | None = None
70
+ n: int | None = None
71
+ best_of: int | None = None
72
+ response_format: dict[str, Any] | None = None
73
+ json_mode: bool | None = None
74
+ tool_config: dict[str, Any] | None = None
75
+ raw_params: dict[str, Any] = field(default_factory=dict)
76
+
77
+
78
+ @dataclass
79
+ class LLMContentPart:
80
+ """A content item within a message (text, tool-structured JSON, image, etc.)."""
81
+
82
+ type: str
83
+ text: str | None = None
84
+ # For Responses API or multimodal payloads, keep a generic value
85
+ data: dict[str, Any] | None = None
86
+ # Blob reference fields (for image/audio/video)
87
+ mime_type: str | None = None
88
+ uri: str | None = None
89
+ base64_data: str | None = None
90
+ size_bytes: int | None = None
91
+ sha256: str | None = None
92
+ width: int | None = None
93
+ height: int | None = None
94
+ duration_ms: int | None = None
95
+ sample_rate: int | None = None
96
+ channels: int | None = None
97
+ language: str | None = None
98
+
99
+
100
+ @dataclass
101
+ class LLMMessage:
102
+ """A message in a chat-style exchange.
103
+
104
+ For Completions-style calls, `role="user"` with one text part is typical for input,
105
+ and `role="assistant"` for output. Responses API can emit multiple parts;
106
+ use `parts` for generality.
107
+ """
108
+
109
+ role: str # e.g., system, user, assistant, tool, function, developer
110
+ parts: list[LLMContentPart] = field(default_factory=list)
111
+ name: str | None = None
112
+ tool_call_id: str | None = None
113
+ metadata: dict[str, Any] = field(default_factory=dict)
114
+
115
+
116
+ @dataclass
117
+ class ToolCallSpec:
118
+ """A tool/function call requested by the model (not yet executed)."""
119
+
120
+ name: str
121
+ arguments_json: str # serialized JSON payload provided by the model
122
+ arguments: dict[str, Any] | None = None # parsed convenience
123
+ call_id: str | None = None # provider-assigned or synthesized
124
+ index: int | None = None # ordinal within a batch
125
+ parent_call_id: str | None = None
126
+ metadata: dict[str, Any] = field(default_factory=dict)
127
+
128
+
129
+ @dataclass
130
+ class ToolCallResult:
131
+ """The result of executing a tool/function call outside the model.
132
+
133
+ This is distinct from the model's own output. Attach execution details for
134
+ auditability.
135
+ """
136
+
137
+ call_id: str | None = None # correlate to ToolCallSpec
138
+ output_text: str | None = None
139
+ exit_code: int | None = None
140
+ status: Literal["ok", "error"] | None = None
141
+ error_message: str | None = None
142
+ started_at: datetime | None = None
143
+ completed_at: datetime | None = None
144
+ duration_ms: int | None = None
145
+ metadata: dict[str, Any] = field(default_factory=dict)
146
+
147
+
148
+ @dataclass
149
+ class LLMChunk:
150
+ """Optional streaming chunk representation (for Responses/Chat streaming)."""
151
+
152
+ sequence_index: int
153
+ received_at: datetime
154
+ event_type: str | None = None # e.g., content.delta, tool.delta, message.stop
155
+ choice_index: int | None = None
156
+ raw_json: str | None = None
157
+ delta_text: str | None = None
158
+ delta: dict[str, Any] | None = None
159
+ metadata: dict[str, Any] = field(default_factory=dict)
160
+
161
+
162
+ @dataclass
163
+ class LLMCallRecord:
164
+ """Normalized record of a single LLM API call.
165
+
166
+ Fields capture both the request (input) and the response (output), with
167
+ optional tool calls and results as emitted by/through the agent runtime.
168
+ """
169
+
170
+ # Identity and classification
171
+ call_id: str
172
+ api_type: str # e.g., "chat_completions", "completions", "responses"
173
+ provider: str | None = None # e.g., "openai", "anthropic"
174
+ model_name: str = ""
175
+ schema_version: str = "1.0"
176
+
177
+ # Timing
178
+ started_at: datetime | None = None
179
+ completed_at: datetime | None = None
180
+ latency_ms: int | None = None # convenience cache (completed - started)
181
+
182
+ # Request
183
+ request_params: LLMRequestParams = field(default_factory=LLMRequestParams)
184
+ input_messages: list[LLMMessage] = field(default_factory=list)
185
+ input_text: str | None = None # for completions-style prompts
186
+ tool_choice: str | None = None # e.g., "auto", "none", or a specific tool
187
+
188
+ # Response
189
+ output_messages: list[LLMMessage] = field(default_factory=list)
190
+ outputs: list[LLMMessage] = field(default_factory=list) # for n>1 choices
191
+ output_text: str | None = None # for completions-style outputs
192
+ output_tool_calls: list[ToolCallSpec] = field(default_factory=list)
193
+ usage: LLMUsage | None = None
194
+ finish_reason: str | None = None
195
+ choice_index: int | None = None
196
+
197
+ # Tool execution results (post-model, optional)
198
+ tool_results: list[ToolCallResult] = field(default_factory=list)
199
+
200
+ # Streaming (optional)
201
+ chunks: list[LLMChunk] | None = None
202
+
203
+ # Raw payloads for audit/debugging
204
+ request_raw_json: str | None = None
205
+ response_raw_json: str | None = None
206
+
207
+ # Provider- or call-specific extra data (tracing ids, etc.)
208
+ span_id: str | None = None
209
+ trace_id: str | None = None
210
+ provider_request_id: str | None = None
211
+ request_server_timing: dict[str, Any] | None = None
212
+ metadata: dict[str, Any] = field(default_factory=dict)
213
+ # Error/outcome
214
+ outcome: Literal["success", "error", "timeout", "cancelled"] | None = None
215
+ error: dict[str, Any] | None = None # {code, message, type, raw}
216
+ # Logprob traces (optional)
217
+ token_traces: list[dict[str, Any]] | None = None
218
+ # Safety/refusal (optional)
219
+ safety: dict[str, Any] | None = None
220
+ refusal: dict[str, Any] | None = None
221
+ # Privacy/redactions
222
+ redactions: list[dict[str, Any]] | None = None
223
+
224
+
225
+ def compute_latency_ms(record: LLMCallRecord) -> Optional[int]:
226
+ """Compute and update latency_ms from timestamps if available."""
227
+ if record.started_at and record.completed_at:
228
+ delta = int((record.completed_at - record.started_at).total_seconds() * 1000)
229
+ record.latency_ms = delta
230
+ return delta
231
+ return record.latency_ms
232
+
233
+
234
+ # Provider mapping guidance (summary)
235
+ # -----------------------------------
236
+ # - OpenAI Chat Completions:
237
+ # - api_type = "chat_completions"
238
+ # - input_messages from `messages`, output_messages from `choices[].message`
239
+ # - usage from response.usage
240
+ # - tool_calls map to ToolCallSpec (choices[].message.tool_calls)
241
+ #
242
+ # - OpenAI Completions:
243
+ # - api_type = "completions"
244
+ # - input_text from `prompt`, output_text from `choices[].text`
245
+ # - usage from response.usage
246
+ #
247
+ # - OpenAI Responses API (streamed):
248
+ # - api_type = "responses"
249
+ # - input_messages from `input[]` or `messages[]` per content type
250
+ # - output_messages from streamed `message` nodes; usage from terminal chunk
251
+ # - chunks hold raw SSE segments if desired
252
+ # - tool_calls from streamed `function_call`/`tool_call` nodes
253
+ #
254
+ # Tool execution results should be attached as ToolCallResult entries when the
255
+ # agent runtime executes the requested tool(s) and has ground-truth outputs.
256
+
257
+
@@ -5,7 +5,7 @@ from datetime import datetime
5
5
  from typing import Dict, List, Optional, Any, Union
6
6
  from contextlib import asynccontextmanager
7
7
 
8
- from .abstractions import SessionTrace, SessionTimeStep, BaseEvent, SessionEventMessage, TimeRecord
8
+ from .abstractions import SessionTrace, SessionTimeStep, BaseEvent, SessionEventMarkovBlanketMessage, TimeRecord
9
9
  from .decorators import set_session_id, set_turn_number, set_session_tracer, SessionContext
10
10
  from .turso.manager import AsyncSQLTraceManager
11
11
  from .config import CONFIG
@@ -93,7 +93,7 @@ class SessionTracer:
93
93
  created_at=datetime.utcnow(),
94
94
  session_time_steps=[],
95
95
  event_history=[],
96
- message_history=[],
96
+ markov_blanket_message_history=[],
97
97
  metadata=metadata or {},
98
98
  )
99
99
 
@@ -215,7 +215,7 @@ class SessionTracer:
215
215
  if self._current_trace is None:
216
216
  raise RuntimeError("No active session")
217
217
 
218
- msg = SessionEventMessage(
218
+ msg = SessionEventMarkovBlanketMessage(
219
219
  content=content,
220
220
  message_type=message_type,
221
221
  time_record=TimeRecord(
@@ -232,9 +232,9 @@ class SessionTracer:
232
232
  await self.hooks.trigger("message_recorded", message=msg)
233
233
 
234
234
  # Add to histories
235
- self._current_trace.message_history.append(msg)
235
+ self._current_trace.markov_blanket_message_history.append(msg)
236
236
  if self._current_step:
237
- self._current_step.step_messages.append(msg)
237
+ self._current_step.markov_blanket_messages.append(msg)
238
238
 
239
239
  async def end_session(self, save: bool = None) -> SessionTrace:
240
240
  """End the current session.
@@ -20,7 +20,7 @@ from typing import List
20
20
  from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
21
21
  from synth_ai.tracing_v3.session_tracer import SessionTracer
22
22
  from synth_ai.tracing_v3.abstractions import (
23
- SessionEventMessage,
23
+ SessionEventMarkovBlanketMessage,
24
24
  TimeRecord,
25
25
  RuntimeEvent,
26
26
  EnvironmentEvent,