synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. synth_ai/cli/__init__.py +66 -0
  2. synth_ai/cli/balance.py +205 -0
  3. synth_ai/cli/calc.py +70 -0
  4. synth_ai/cli/demo.py +74 -0
  5. synth_ai/{cli.py → cli/legacy_root_backup.py} +60 -15
  6. synth_ai/cli/man.py +103 -0
  7. synth_ai/cli/recent.py +126 -0
  8. synth_ai/cli/root.py +184 -0
  9. synth_ai/cli/status.py +126 -0
  10. synth_ai/cli/traces.py +136 -0
  11. synth_ai/cli/watch.py +508 -0
  12. synth_ai/config/base_url.py +53 -0
  13. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +252 -0
  14. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_duckdb_v2_backup.py +413 -0
  15. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +760 -0
  16. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_synth.py +34 -0
  17. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth.py +1740 -0
  18. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth_v2_backup.py +1318 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_duckdb_v2_backup.py +386 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v2_backup.py +1352 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +4 -4
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/test_crafter_react_agent_openai_v2_backup.py +2551 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1 -1
  25. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +1 -1
  26. synth_ai/environments/examples/crafter_classic/agent_demos/old/traces/session_crafter_episode_16_15227b68-2906-416f-acc4-d6a9b4fa5828_20250725_001154.json +1363 -1
  27. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +3 -3
  28. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  29. synth_ai/environments/examples/crafter_custom/environment.py +1 -1
  30. synth_ai/environments/examples/enron/dataset/corbt___enron_emails_sample_questions/default/0.0.0/293c9fe8170037e01cc9cf5834e0cd5ef6f1a6bb/dataset_info.json +1 -0
  31. synth_ai/environments/examples/nethack/helpers/achievements.json +64 -0
  32. synth_ai/environments/examples/red/units/test_exploration_strategy.py +1 -1
  33. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +5 -5
  34. synth_ai/environments/examples/red/units/test_movement_debug.py +2 -2
  35. synth_ai/environments/examples/red/units/test_retry_movement.py +1 -1
  36. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/available_envs.json +122 -0
  37. synth_ai/environments/examples/sokoban/verified_puzzles.json +54987 -0
  38. synth_ai/environments/service/core_routes.py +1 -1
  39. synth_ai/experimental/synth_oss.py +446 -0
  40. synth_ai/learning/core.py +21 -0
  41. synth_ai/learning/gateway.py +4 -0
  42. synth_ai/learning/prompts/gepa.py +0 -0
  43. synth_ai/learning/prompts/mipro.py +8 -0
  44. synth_ai/lm/__init__.py +3 -0
  45. synth_ai/lm/core/main.py +4 -0
  46. synth_ai/lm/core/main_v3.py +238 -122
  47. synth_ai/lm/core/vendor_clients.py +4 -0
  48. synth_ai/lm/provider_support/openai.py +11 -2
  49. synth_ai/lm/vendors/base.py +7 -0
  50. synth_ai/lm/vendors/openai_standard.py +339 -4
  51. synth_ai/lm/vendors/openai_standard_responses.py +243 -0
  52. synth_ai/lm/vendors/synth_client.py +155 -5
  53. synth_ai/lm/warmup.py +54 -17
  54. synth_ai/tracing/__init__.py +18 -0
  55. synth_ai/tracing_v1/__init__.py +29 -14
  56. synth_ai/tracing_v3/__init__.py +2 -2
  57. synth_ai/tracing_v3/abstractions.py +62 -17
  58. synth_ai/tracing_v3/config.py +13 -7
  59. synth_ai/tracing_v3/db_config.py +6 -6
  60. synth_ai/tracing_v3/hooks.py +1 -1
  61. synth_ai/tracing_v3/llm_call_record_helpers.py +350 -0
  62. synth_ai/tracing_v3/lm_call_record_abstractions.py +257 -0
  63. synth_ai/tracing_v3/session_tracer.py +5 -5
  64. synth_ai/tracing_v3/tests/test_concurrent_operations.py +1 -1
  65. synth_ai/tracing_v3/tests/test_llm_call_records.py +672 -0
  66. synth_ai/tracing_v3/tests/test_session_tracer.py +43 -9
  67. synth_ai/tracing_v3/tests/test_turso_manager.py +1 -1
  68. synth_ai/tracing_v3/turso/manager.py +18 -11
  69. synth_ai/tracing_v3/turso/models.py +1 -0
  70. synth_ai/tui/__main__.py +13 -0
  71. synth_ai/tui/dashboard.py +329 -0
  72. synth_ai/v0/tracing/__init__.py +0 -0
  73. synth_ai/{tracing → v0/tracing}/base_client.py +3 -3
  74. synth_ai/{tracing → v0/tracing}/client_manager.py +1 -1
  75. synth_ai/{tracing → v0/tracing}/context.py +1 -1
  76. synth_ai/{tracing → v0/tracing}/decorators.py +11 -11
  77. synth_ai/v0/tracing/events/__init__.py +0 -0
  78. synth_ai/{tracing → v0/tracing}/events/manage.py +4 -4
  79. synth_ai/{tracing → v0/tracing}/events/scope.py +6 -6
  80. synth_ai/{tracing → v0/tracing}/events/store.py +3 -3
  81. synth_ai/{tracing → v0/tracing}/immediate_client.py +6 -6
  82. synth_ai/{tracing → v0/tracing}/log_client_base.py +2 -2
  83. synth_ai/{tracing → v0/tracing}/retry_queue.py +3 -3
  84. synth_ai/{tracing → v0/tracing}/trackers.py +2 -2
  85. synth_ai/{tracing → v0/tracing}/upload.py +4 -4
  86. synth_ai/v0/tracing_v1/__init__.py +16 -0
  87. synth_ai/{tracing_v1 → v0/tracing_v1}/base_client.py +3 -3
  88. synth_ai/{tracing_v1 → v0/tracing_v1}/client_manager.py +1 -1
  89. synth_ai/{tracing_v1 → v0/tracing_v1}/context.py +1 -1
  90. synth_ai/{tracing_v1 → v0/tracing_v1}/decorators.py +11 -11
  91. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  92. synth_ai/{tracing_v1 → v0/tracing_v1}/events/manage.py +4 -4
  93. synth_ai/{tracing_v1 → v0/tracing_v1}/events/scope.py +6 -6
  94. synth_ai/{tracing_v1 → v0/tracing_v1}/events/store.py +3 -3
  95. synth_ai/{tracing_v1 → v0/tracing_v1}/immediate_client.py +6 -6
  96. synth_ai/{tracing_v1 → v0/tracing_v1}/log_client_base.py +2 -2
  97. synth_ai/{tracing_v1 → v0/tracing_v1}/retry_queue.py +3 -3
  98. synth_ai/{tracing_v1 → v0/tracing_v1}/trackers.py +2 -2
  99. synth_ai/{tracing_v1 → v0/tracing_v1}/upload.py +4 -4
  100. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/METADATA +100 -5
  101. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/RECORD +115 -75
  102. /synth_ai/{tracing/events/__init__.py → compound/cais.py} +0 -0
  103. /synth_ai/{tracing_v1/events/__init__.py → environments/examples/crafter_classic/debug_translation.py} +0 -0
  104. /synth_ai/{tracing → v0/tracing}/abstractions.py +0 -0
  105. /synth_ai/{tracing → v0/tracing}/config.py +0 -0
  106. /synth_ai/{tracing → v0/tracing}/local.py +0 -0
  107. /synth_ai/{tracing → v0/tracing}/utils.py +0 -0
  108. /synth_ai/{tracing_v1 → v0/tracing_v1}/abstractions.py +0 -0
  109. /synth_ai/{tracing_v1 → v0/tracing_v1}/config.py +0 -0
  110. /synth_ai/{tracing_v1 → v0/tracing_v1}/local.py +0 -0
  111. /synth_ai/{tracing_v1 → v0/tracing_v1}/utils.py +0 -0
  112. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/WHEEL +0 -0
  113. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
  114. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
  115. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,672 @@
1
+ """Unit tests for LMCAISEvent with LLMCallRecord integration.
2
+
3
+ This module tests the new call_records field in LMCAISEvent and demonstrates
4
+ proper usage patterns for migrating from legacy fields to the new structure.
5
+ """
6
+
7
+ import json
8
+ import time
9
+ import uuid
10
+ from datetime import datetime
11
+ from typing import List, Dict, Any
12
+
13
+ import pytest
14
+
15
+ from synth_ai.tracing_v3.abstractions import (
16
+ LMCAISEvent,
17
+ TimeRecord,
18
+ SessionTimeStep,
19
+ SessionTrace,
20
+ )
21
+ from synth_ai.tracing_v3.lm_call_record_abstractions import (
22
+ LLMCallRecord,
23
+ LLMUsage,
24
+ LLMRequestParams,
25
+ LLMMessage,
26
+ LLMContentPart,
27
+ ToolCallSpec,
28
+ ToolCallResult,
29
+ compute_latency_ms,
30
+ )
31
+
32
+
33
+ class TestLLMCallRecord:
34
+ """Test LLMCallRecord creation and manipulation."""
35
+
36
+ def test_create_basic_call_record(self):
37
+ """Test creating a basic LLMCallRecord."""
38
+ call_id = str(uuid.uuid4())
39
+ started_at = datetime.utcnow()
40
+
41
+ record = LLMCallRecord(
42
+ call_id=call_id,
43
+ api_type="chat_completions",
44
+ provider="openai",
45
+ model_name="gpt-4",
46
+ started_at=started_at,
47
+ input_messages=[
48
+ LLMMessage(
49
+ role="user",
50
+ parts=[LLMContentPart(type="text", text="What is 2+2?")]
51
+ )
52
+ ],
53
+ output_messages=[
54
+ LLMMessage(
55
+ role="assistant",
56
+ parts=[LLMContentPart(type="text", text="4")]
57
+ )
58
+ ],
59
+ usage=LLMUsage(
60
+ input_tokens=10,
61
+ output_tokens=5,
62
+ total_tokens=15,
63
+ cost_usd=0.0003
64
+ ),
65
+ finish_reason="stop"
66
+ )
67
+
68
+ assert record.call_id == call_id
69
+ assert record.api_type == "chat_completions"
70
+ assert record.model_name == "gpt-4"
71
+ assert record.usage.total_tokens == 15
72
+ assert len(record.input_messages) == 1
73
+ assert len(record.output_messages) == 1
74
+
75
+ def test_compute_latency(self):
76
+ """Test latency computation from timestamps."""
77
+ started_at = datetime.utcnow()
78
+ completed_at = datetime.utcnow()
79
+
80
+ record = LLMCallRecord(
81
+ call_id=str(uuid.uuid4()),
82
+ api_type="chat_completions",
83
+ started_at=started_at,
84
+ completed_at=completed_at
85
+ )
86
+
87
+ latency = compute_latency_ms(record)
88
+ assert latency is not None
89
+ assert latency >= 0
90
+ assert record.latency_ms == latency
91
+
92
+ def test_tool_call_record(self):
93
+ """Test LLMCallRecord with tool calls."""
94
+ record = LLMCallRecord(
95
+ call_id=str(uuid.uuid4()),
96
+ api_type="chat_completions",
97
+ provider="openai",
98
+ model_name="gpt-4",
99
+ output_tool_calls=[
100
+ ToolCallSpec(
101
+ name="get_weather",
102
+ arguments_json='{"location": "San Francisco"}',
103
+ arguments={"location": "San Francisco"},
104
+ call_id="tool_1"
105
+ )
106
+ ],
107
+ tool_results=[
108
+ ToolCallResult(
109
+ call_id="tool_1",
110
+ output_text="72°F, sunny",
111
+ status="ok",
112
+ duration_ms=150
113
+ )
114
+ ]
115
+ )
116
+
117
+ assert len(record.output_tool_calls) == 1
118
+ assert record.output_tool_calls[0].name == "get_weather"
119
+ assert len(record.tool_results) == 1
120
+ assert record.tool_results[0].status == "ok"
121
+
122
+
123
+ class TestLMCAISEventWithCallRecords:
124
+ """Test LMCAISEvent with integrated LLMCallRecord."""
125
+
126
+ def test_create_event_with_call_records(self):
127
+ """Test creating an LMCAISEvent with call_records."""
128
+ call_record = LLMCallRecord(
129
+ call_id=str(uuid.uuid4()),
130
+ api_type="chat_completions",
131
+ provider="openai",
132
+ model_name="gpt-4",
133
+ usage=LLMUsage(
134
+ input_tokens=100,
135
+ output_tokens=50,
136
+ total_tokens=150,
137
+ cost_usd=0.003
138
+ ),
139
+ latency_ms=500
140
+ )
141
+
142
+ event = LMCAISEvent(
143
+ system_instance_id="llm_system",
144
+ time_record=TimeRecord(event_time=time.time()),
145
+ call_records=[call_record]
146
+ )
147
+
148
+ assert len(event.call_records) == 1
149
+ assert event.call_records[0].model_name == "gpt-4"
150
+ assert event.call_records[0].usage.total_tokens == 150
151
+
152
+ def test_aggregate_from_call_records(self):
153
+ """Test computing aggregates from multiple call_records."""
154
+ call_records = [
155
+ LLMCallRecord(
156
+ call_id=str(uuid.uuid4()),
157
+ api_type="chat_completions",
158
+ model_name="gpt-4",
159
+ usage=LLMUsage(
160
+ input_tokens=100,
161
+ output_tokens=50,
162
+ total_tokens=150,
163
+ cost_usd=0.003
164
+ ),
165
+ latency_ms=500
166
+ ),
167
+ LLMCallRecord(
168
+ call_id=str(uuid.uuid4()),
169
+ api_type="chat_completions",
170
+ model_name="gpt-4",
171
+ usage=LLMUsage(
172
+ input_tokens=200,
173
+ output_tokens=100,
174
+ total_tokens=300,
175
+ cost_usd=0.006
176
+ ),
177
+ latency_ms=700
178
+ )
179
+ ]
180
+
181
+ # Create event with call_records
182
+ event = LMCAISEvent(
183
+ system_instance_id="llm_system",
184
+ time_record=TimeRecord(event_time=time.time()),
185
+ call_records=call_records
186
+ )
187
+
188
+ # Compute aggregates from call_records
189
+ total_input_tokens = sum(
190
+ r.usage.input_tokens for r in call_records
191
+ if r.usage and r.usage.input_tokens
192
+ )
193
+ total_output_tokens = sum(
194
+ r.usage.output_tokens for r in call_records
195
+ if r.usage and r.usage.output_tokens
196
+ )
197
+ total_tokens = sum(
198
+ r.usage.total_tokens for r in call_records
199
+ if r.usage and r.usage.total_tokens
200
+ )
201
+ total_cost = sum(
202
+ r.usage.cost_usd for r in call_records
203
+ if r.usage and r.usage.cost_usd
204
+ )
205
+ total_latency = sum(
206
+ r.latency_ms for r in call_records
207
+ if r.latency_ms
208
+ )
209
+
210
+ # Set aggregates on event
211
+ event.input_tokens = total_input_tokens
212
+ event.output_tokens = total_output_tokens
213
+ event.total_tokens = total_tokens
214
+ event.cost_usd = total_cost
215
+ event.latency_ms = total_latency
216
+
217
+ assert event.input_tokens == 300
218
+ assert event.output_tokens == 150
219
+ assert event.total_tokens == 450
220
+ assert abs(event.cost_usd - 0.009) < 0.0001 # Use floating point comparison
221
+ assert event.latency_ms == 1200
222
+
223
+ def test_migration_pattern(self):
224
+ """Test migration from legacy fields to call_records."""
225
+ # Legacy pattern (what we're migrating from)
226
+ legacy_event = LMCAISEvent(
227
+ system_instance_id="llm_system",
228
+ time_record=TimeRecord(event_time=time.time()),
229
+ model_name="gpt-4",
230
+ provider="openai",
231
+ input_tokens=100,
232
+ output_tokens=50,
233
+ total_tokens=150,
234
+ cost_usd=0.003,
235
+ latency_ms=500
236
+ )
237
+
238
+ # New pattern (what we're migrating to)
239
+ new_event = LMCAISEvent(
240
+ system_instance_id="llm_system",
241
+ time_record=TimeRecord(event_time=time.time()),
242
+ # Aggregates can stay on the event
243
+ total_tokens=150,
244
+ cost_usd=0.003,
245
+ latency_ms=500,
246
+ # Details go in call_records
247
+ call_records=[
248
+ LLMCallRecord(
249
+ call_id=str(uuid.uuid4()),
250
+ api_type="chat_completions",
251
+ provider="openai",
252
+ model_name="gpt-4",
253
+ usage=LLMUsage(
254
+ input_tokens=100,
255
+ output_tokens=50,
256
+ total_tokens=150,
257
+ cost_usd=0.003
258
+ ),
259
+ latency_ms=500
260
+ )
261
+ ]
262
+ )
263
+
264
+ # Both should represent the same information
265
+ assert legacy_event.total_tokens == new_event.total_tokens
266
+ assert legacy_event.cost_usd == new_event.cost_usd
267
+ assert legacy_event.model_name == new_event.call_records[0].model_name
268
+ assert legacy_event.provider == new_event.call_records[0].provider
269
+
270
+
271
+ class TestComplexScenarios:
272
+ """Test complex scenarios with multiple calls and tool usage."""
273
+
274
+ def test_multi_turn_conversation(self):
275
+ """Test a multi-turn conversation with multiple LLM calls."""
276
+ session = SessionTrace(
277
+ session_id=str(uuid.uuid4()),
278
+ created_at=datetime.utcnow()
279
+ )
280
+
281
+ # Turn 1: Initial question
282
+ turn1 = SessionTimeStep(
283
+ step_id="turn_1",
284
+ step_index=0,
285
+ turn_number=1
286
+ )
287
+
288
+ turn1_event = LMCAISEvent(
289
+ system_instance_id="llm_system",
290
+ time_record=TimeRecord(event_time=time.time()),
291
+ call_records=[
292
+ LLMCallRecord(
293
+ call_id=str(uuid.uuid4()),
294
+ api_type="chat_completions",
295
+ model_name="gpt-4",
296
+ input_messages=[
297
+ LLMMessage(
298
+ role="user",
299
+ parts=[LLMContentPart(type="text", text="What's the weather?")]
300
+ )
301
+ ],
302
+ output_messages=[
303
+ LLMMessage(
304
+ role="assistant",
305
+ parts=[LLMContentPart(
306
+ type="text",
307
+ text="I'll check the weather for you."
308
+ )]
309
+ )
310
+ ],
311
+ output_tool_calls=[
312
+ ToolCallSpec(
313
+ name="get_weather",
314
+ arguments_json='{"location": "current"}',
315
+ call_id="weather_1"
316
+ )
317
+ ],
318
+ usage=LLMUsage(input_tokens=10, output_tokens=20, total_tokens=30)
319
+ )
320
+ ]
321
+ )
322
+
323
+ turn1.events.append(turn1_event)
324
+ session.session_time_steps.append(turn1)
325
+
326
+ # Turn 2: Tool result and response
327
+ turn2 = SessionTimeStep(
328
+ step_id="turn_2",
329
+ step_index=1,
330
+ turn_number=2
331
+ )
332
+
333
+ turn2_event = LMCAISEvent(
334
+ system_instance_id="llm_system",
335
+ time_record=TimeRecord(event_time=time.time()),
336
+ call_records=[
337
+ LLMCallRecord(
338
+ call_id=str(uuid.uuid4()),
339
+ api_type="chat_completions",
340
+ model_name="gpt-4",
341
+ input_messages=[
342
+ LLMMessage(
343
+ role="tool",
344
+ tool_call_id="weather_1",
345
+ parts=[LLMContentPart(
346
+ type="text",
347
+ text="San Francisco: 72°F, sunny"
348
+ )]
349
+ )
350
+ ],
351
+ output_messages=[
352
+ LLMMessage(
353
+ role="assistant",
354
+ parts=[LLMContentPart(
355
+ type="text",
356
+ text="The weather in San Francisco is 72°F and sunny."
357
+ )]
358
+ )
359
+ ],
360
+ usage=LLMUsage(input_tokens=15, output_tokens=25, total_tokens=40),
361
+ tool_results=[
362
+ ToolCallResult(
363
+ call_id="weather_1",
364
+ output_text="San Francisco: 72°F, sunny",
365
+ status="ok"
366
+ )
367
+ ]
368
+ )
369
+ ]
370
+ )
371
+
372
+ turn2.events.append(turn2_event)
373
+ session.session_time_steps.append(turn2)
374
+
375
+ # Verify session structure
376
+ assert len(session.session_time_steps) == 2
377
+ assert len(session.session_time_steps[0].events) == 1
378
+ assert len(session.session_time_steps[1].events) == 1
379
+
380
+ # Verify tool call flow
381
+ turn1_call = session.session_time_steps[0].events[0].call_records[0]
382
+ turn2_call = session.session_time_steps[1].events[0].call_records[0]
383
+
384
+ assert len(turn1_call.output_tool_calls) == 1
385
+ assert turn1_call.output_tool_calls[0].name == "get_weather"
386
+ assert len(turn2_call.tool_results) == 1
387
+ assert turn2_call.tool_results[0].call_id == "weather_1"
388
+
389
+ def test_streaming_response(self):
390
+ """Test LLMCallRecord with streaming chunks."""
391
+ from synth_ai.tracing_v3.lm_call_record_abstractions import LLMChunk
392
+
393
+ chunks = [
394
+ LLMChunk(
395
+ sequence_index=0,
396
+ received_at=datetime.utcnow(),
397
+ event_type="content.delta",
398
+ delta_text="The",
399
+ choice_index=0
400
+ ),
401
+ LLMChunk(
402
+ sequence_index=1,
403
+ received_at=datetime.utcnow(),
404
+ event_type="content.delta",
405
+ delta_text=" answer",
406
+ choice_index=0
407
+ ),
408
+ LLMChunk(
409
+ sequence_index=2,
410
+ received_at=datetime.utcnow(),
411
+ event_type="content.delta",
412
+ delta_text=" is",
413
+ choice_index=0
414
+ ),
415
+ LLMChunk(
416
+ sequence_index=3,
417
+ received_at=datetime.utcnow(),
418
+ event_type="content.delta",
419
+ delta_text=" 42",
420
+ choice_index=0
421
+ ),
422
+ LLMChunk(
423
+ sequence_index=4,
424
+ received_at=datetime.utcnow(),
425
+ event_type="message.stop",
426
+ choice_index=0
427
+ )
428
+ ]
429
+
430
+ record = LLMCallRecord(
431
+ call_id=str(uuid.uuid4()),
432
+ api_type="responses", # OpenAI Responses API style
433
+ model_name="gpt-4",
434
+ chunks=chunks,
435
+ output_text="The answer is 42", # Final collapsed output
436
+ usage=LLMUsage(input_tokens=10, output_tokens=5, total_tokens=15)
437
+ )
438
+
439
+ assert len(record.chunks) == 5
440
+ assert record.output_text == "The answer is 42"
441
+
442
+ # Reconstruct from chunks
443
+ reconstructed = "".join(
444
+ c.delta_text for c in chunks
445
+ if c.delta_text
446
+ )
447
+ assert reconstructed == "The answer is 42"
448
+
449
+
450
+ class TestProviderMappings:
451
+ """Test mapping different provider formats to LLMCallRecord."""
452
+
453
+ def test_openai_chat_completions_mapping(self):
454
+ """Test mapping OpenAI Chat Completions to LLMCallRecord."""
455
+ # Simulate OpenAI response structure
456
+ openai_response = {
457
+ "id": "chatcmpl-123",
458
+ "object": "chat.completion",
459
+ "created": 1677652288,
460
+ "model": "gpt-4",
461
+ "choices": [{
462
+ "index": 0,
463
+ "message": {
464
+ "role": "assistant",
465
+ "content": "Hello! How can I help you today?"
466
+ },
467
+ "finish_reason": "stop"
468
+ }],
469
+ "usage": {
470
+ "prompt_tokens": 10,
471
+ "completion_tokens": 9,
472
+ "total_tokens": 19
473
+ }
474
+ }
475
+
476
+ # Map to LLMCallRecord
477
+ record = LLMCallRecord(
478
+ call_id=openai_response["id"],
479
+ api_type="chat_completions",
480
+ provider="openai",
481
+ model_name=openai_response["model"],
482
+ output_messages=[
483
+ LLMMessage(
484
+ role=openai_response["choices"][0]["message"]["role"],
485
+ parts=[LLMContentPart(
486
+ type="text",
487
+ text=openai_response["choices"][0]["message"]["content"]
488
+ )]
489
+ )
490
+ ],
491
+ usage=LLMUsage(
492
+ input_tokens=openai_response["usage"]["prompt_tokens"],
493
+ output_tokens=openai_response["usage"]["completion_tokens"],
494
+ total_tokens=openai_response["usage"]["total_tokens"]
495
+ ),
496
+ finish_reason=openai_response["choices"][0]["finish_reason"],
497
+ provider_request_id=openai_response["id"]
498
+ )
499
+
500
+ assert record.call_id == "chatcmpl-123"
501
+ assert record.model_name == "gpt-4"
502
+ assert record.usage.total_tokens == 19
503
+ assert record.finish_reason == "stop"
504
+
505
+ def test_anthropic_messages_mapping(self):
506
+ """Test mapping Anthropic Messages API to LLMCallRecord."""
507
+ # Simulate Anthropic response structure
508
+ anthropic_response = {
509
+ "id": "msg_123",
510
+ "type": "message",
511
+ "role": "assistant",
512
+ "content": [
513
+ {
514
+ "type": "text",
515
+ "text": "I'll help you with that."
516
+ }
517
+ ],
518
+ "model": "claude-3-opus-20240229",
519
+ "stop_reason": "end_turn",
520
+ "usage": {
521
+ "input_tokens": 15,
522
+ "output_tokens": 12
523
+ }
524
+ }
525
+
526
+ # Map to LLMCallRecord
527
+ record = LLMCallRecord(
528
+ call_id=anthropic_response["id"],
529
+ api_type="messages", # Anthropic Messages API
530
+ provider="anthropic",
531
+ model_name=anthropic_response["model"],
532
+ output_messages=[
533
+ LLMMessage(
534
+ role=anthropic_response["role"],
535
+ parts=[
536
+ LLMContentPart(
537
+ type=content["type"],
538
+ text=content["text"]
539
+ )
540
+ for content in anthropic_response["content"]
541
+ ]
542
+ )
543
+ ],
544
+ usage=LLMUsage(
545
+ input_tokens=anthropic_response["usage"]["input_tokens"],
546
+ output_tokens=anthropic_response["usage"]["output_tokens"],
547
+ total_tokens=(
548
+ anthropic_response["usage"]["input_tokens"] +
549
+ anthropic_response["usage"]["output_tokens"]
550
+ )
551
+ ),
552
+ finish_reason=anthropic_response["stop_reason"],
553
+ provider_request_id=anthropic_response["id"]
554
+ )
555
+
556
+ assert record.call_id == "msg_123"
557
+ assert record.model_name == "claude-3-opus-20240229"
558
+ assert record.usage.total_tokens == 27
559
+ assert record.finish_reason == "end_turn"
560
+
561
+
562
+ def helper_compute_aggregates_from_records(call_records: List[LLMCallRecord]) -> Dict[str, Any]:
563
+ """Helper function to compute aggregates from call_records.
564
+
565
+ This demonstrates the pattern for computing event-level aggregates
566
+ from a list of LLMCallRecord instances.
567
+ """
568
+ aggregates = {
569
+ "input_tokens": 0,
570
+ "output_tokens": 0,
571
+ "total_tokens": 0,
572
+ "cost_usd": 0.0,
573
+ "latency_ms": 0,
574
+ "models_used": set(),
575
+ "providers_used": set(),
576
+ "tool_calls_count": 0,
577
+ "error_count": 0
578
+ }
579
+
580
+ for record in call_records:
581
+ if record.usage:
582
+ if record.usage.input_tokens:
583
+ aggregates["input_tokens"] += record.usage.input_tokens
584
+ if record.usage.output_tokens:
585
+ aggregates["output_tokens"] += record.usage.output_tokens
586
+ if record.usage.total_tokens:
587
+ aggregates["total_tokens"] += record.usage.total_tokens
588
+ if record.usage.cost_usd:
589
+ aggregates["cost_usd"] += record.usage.cost_usd
590
+
591
+ if record.latency_ms:
592
+ aggregates["latency_ms"] += record.latency_ms
593
+
594
+ if record.model_name:
595
+ aggregates["models_used"].add(record.model_name)
596
+
597
+ if record.provider:
598
+ aggregates["providers_used"].add(record.provider)
599
+
600
+ aggregates["tool_calls_count"] += len(record.output_tool_calls)
601
+
602
+ if record.outcome == "error":
603
+ aggregates["error_count"] += 1
604
+
605
+ # Convert sets to lists for JSON serialization
606
+ aggregates["models_used"] = list(aggregates["models_used"])
607
+ aggregates["providers_used"] = list(aggregates["providers_used"])
608
+
609
+ return aggregates
610
+
611
+
612
+ class TestAggregateHelper:
613
+ """Test the aggregate computation helper."""
614
+
615
+ def test_compute_aggregates(self):
616
+ """Test computing aggregates from multiple call records."""
617
+ records = [
618
+ LLMCallRecord(
619
+ call_id="1",
620
+ api_type="chat_completions",
621
+ model_name="gpt-4",
622
+ provider="openai",
623
+ usage=LLMUsage(
624
+ input_tokens=100,
625
+ output_tokens=50,
626
+ total_tokens=150,
627
+ cost_usd=0.003
628
+ ),
629
+ latency_ms=500,
630
+ output_tool_calls=[
631
+ ToolCallSpec(name="tool1", arguments_json="{}")
632
+ ]
633
+ ),
634
+ LLMCallRecord(
635
+ call_id="2",
636
+ api_type="messages",
637
+ model_name="claude-3-opus",
638
+ provider="anthropic",
639
+ usage=LLMUsage(
640
+ input_tokens=200,
641
+ output_tokens=100,
642
+ total_tokens=300,
643
+ cost_usd=0.006
644
+ ),
645
+ latency_ms=700,
646
+ outcome="success"
647
+ ),
648
+ LLMCallRecord(
649
+ call_id="3",
650
+ api_type="chat_completions",
651
+ model_name="gpt-4",
652
+ provider="openai",
653
+ outcome="error",
654
+ error={"code": "rate_limit", "message": "Rate limit exceeded"}
655
+ )
656
+ ]
657
+
658
+ aggregates = helper_compute_aggregates_from_records(records)
659
+
660
+ assert aggregates["input_tokens"] == 300
661
+ assert aggregates["output_tokens"] == 150
662
+ assert aggregates["total_tokens"] == 450
663
+ assert abs(aggregates["cost_usd"] - 0.009) < 0.0001 # Floating point comparison
664
+ assert aggregates["latency_ms"] == 1200
665
+ assert set(aggregates["models_used"]) == {"gpt-4", "claude-3-opus"}
666
+ assert set(aggregates["providers_used"]) == {"openai", "anthropic"}
667
+ assert aggregates["tool_calls_count"] == 1
668
+ assert aggregates["error_count"] == 1
669
+
670
+
671
+ if __name__ == "__main__":
672
+ pytest.main([__file__, "-v"])