tracellm-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. tracellm_cli-0.1.0/PKG-INFO +30 -0
  2. tracellm_cli-0.1.0/README.md +14 -0
  3. tracellm_cli-0.1.0/app/__init__.py +1 -0
  4. tracellm_cli-0.1.0/app/database/__init__.py +1 -0
  5. tracellm_cli-0.1.0/app/database/mongodb.py +94 -0
  6. tracellm_cli-0.1.0/app/database/project_service.py +97 -0
  7. tracellm_cli-0.1.0/app/database/trace_service.py +417 -0
  8. tracellm_cli-0.1.0/app/main.py +44 -0
  9. tracellm_cli-0.1.0/app/models/__init__.py +14 -0
  10. tracellm_cli-0.1.0/app/models/health.py +5 -0
  11. tracellm_cli-0.1.0/app/models/project.py +32 -0
  12. tracellm_cli-0.1.0/app/models/trace.py +71 -0
  13. tracellm_cli-0.1.0/app/models/trace_model.py +62 -0
  14. tracellm_cli-0.1.0/app/routes/__init__.py +1 -0
  15. tracellm_cli-0.1.0/app/routes/health.py +10 -0
  16. tracellm_cli-0.1.0/app/routes/observability.py +60 -0
  17. tracellm_cli-0.1.0/app/routes/projects.py +25 -0
  18. tracellm_cli-0.1.0/app/websocket/__init__.py +1 -0
  19. tracellm_cli-0.1.0/app/websocket/socket.py +64 -0
  20. tracellm_cli-0.1.0/pyproject.toml +36 -0
  21. tracellm_cli-0.1.0/sdk/__init__.py +3 -0
  22. tracellm_cli-0.1.0/sdk/tracer.py +8 -0
  23. tracellm_cli-0.1.0/setup.cfg +4 -0
  24. tracellm_cli-0.1.0/tracellm/__init__.py +6 -0
  25. tracellm_cli-0.1.0/tracellm/banner.py +34 -0
  26. tracellm_cli-0.1.0/tracellm/cli.py +124 -0
  27. tracellm_cli-0.1.0/tracellm/db.py +75 -0
  28. tracellm_cli-0.1.0/tracellm/exporter.py +65 -0
  29. tracellm_cli-0.1.0/tracellm/integrations/__init__.py +4 -0
  30. tracellm_cli-0.1.0/tracellm/integrations/langchain.py +186 -0
  31. tracellm_cli-0.1.0/tracellm/integrations/openai.py +234 -0
  32. tracellm_cli-0.1.0/tracellm/integrations/tool_tracer.py +151 -0
  33. tracellm_cli-0.1.0/tracellm/mascot.py +49 -0
  34. tracellm_cli-0.1.0/tracellm/monitor.py +381 -0
  35. tracellm_cli-0.1.0/tracellm/palette.py +186 -0
  36. tracellm_cli-0.1.0/tracellm/replay.py +80 -0
  37. tracellm_cli-0.1.0/tracellm/startup.py +121 -0
  38. tracellm_cli-0.1.0/tracellm/summary.py +53 -0
  39. tracellm_cli-0.1.0/tracellm/trace_stream.py +68 -0
  40. tracellm_cli-0.1.0/tracellm/tracer.py +598 -0
  41. tracellm_cli-0.1.0/tracellm/tree_renderer.py +78 -0
  42. tracellm_cli-0.1.0/tracellm/utils.py +390 -0
  43. tracellm_cli-0.1.0/tracellm_cli.egg-info/PKG-INFO +30 -0
  44. tracellm_cli-0.1.0/tracellm_cli.egg-info/SOURCES.txt +46 -0
  45. tracellm_cli-0.1.0/tracellm_cli.egg-info/dependency_links.txt +1 -0
  46. tracellm_cli-0.1.0/tracellm_cli.egg-info/entry_points.txt +2 -0
  47. tracellm_cli-0.1.0/tracellm_cli.egg-info/requires.txt +9 -0
  48. tracellm_cli-0.1.0/tracellm_cli.egg-info/top_level.txt +3 -0
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: tracellm-cli
3
+ Version: 0.1.0
4
+ Summary: TraceLLM — Open-source LLM observability, tracing, and replay infrastructure.
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: fastapi
8
+ Requires-Dist: typer
9
+ Requires-Dist: rich
10
+ Requires-Dist: motor
11
+ Requires-Dist: pymongo
12
+ Requires-Dist: python-dotenv
13
+ Requires-Dist: openai
14
+ Requires-Dist: langchain-core
15
+ Requires-Dist: httpx
16
+
17
+ # TraceLLM Backend Package
18
+
19
+ Install in editable mode with:
20
+
21
+ ```bash
22
+ pip install -e .
23
+ ```
24
+
25
+ This package exposes:
26
+
27
+ - `from tracellm import trace`
28
+ - `tracellm demo`
29
+ - `tracellm trace`
30
+ - `tracellm replay`
@@ -0,0 +1,14 @@
1
+ # TraceLLM Backend Package
2
+
3
+ Install in editable mode with:
4
+
5
+ ```bash
6
+ pip install -e .
7
+ ```
8
+
9
+ This package exposes:
10
+
11
+ - `from tracellm import trace`
12
+ - `tracellm demo`
13
+ - `tracellm trace`
14
+ - `tracellm replay`
@@ -0,0 +1 @@
1
+ """TraceLLM backend application package."""
@@ -0,0 +1 @@
1
+ """Database helpers for TraceLLM."""
@@ -0,0 +1,94 @@
1
+ import os
2
+ from typing import Optional
3
+
4
+ from dotenv import load_dotenv
5
+ from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase
6
+ from rich.console import Console
7
+
8
+ console = Console()
9
+
10
+ load_dotenv()
11
+
12
+ client: Optional[AsyncIOMotorClient] = None
13
+ database: Optional[AsyncIOMotorDatabase] = None
14
+ database_name: Optional[str] = None
15
+
16
+
17
+ async def connect_to_mongo(
18
+ mongo_url: Optional[str] = None, db_name: Optional[str] = None
19
+ ) -> AsyncIOMotorDatabase:
20
+ """
21
+ Create the MongoDB client if it is not already connected.
22
+ This supports both FastAPI startup and direct SDK usage.
23
+ """
24
+ global client, database, database_name
25
+
26
+ if database is not None:
27
+ return database
28
+
29
+ mongo_url = mongo_url or os.getenv("MONGO_URL")
30
+ db_name = db_name or os.getenv("DB_NAME")
31
+
32
+ if not mongo_url:
33
+ raise ValueError("MONGO_URL is not set in the environment.")
34
+
35
+ if not db_name:
36
+ raise ValueError("DB_NAME is not set in the environment.")
37
+
38
+ try:
39
+ client = AsyncIOMotorClient(mongo_url)
40
+ database = client[db_name]
41
+ database_name = db_name
42
+
43
+ # Ping MongoDB once so we can fail early if the connection is invalid.
44
+ await client.admin.command("ping")
45
+
46
+ console.print(
47
+ f"[bold green]MongoDB connected[/bold green] [dim](database: {db_name})[/dim]"
48
+ )
49
+ return database
50
+ except Exception:
51
+ client = None
52
+ database = None
53
+ database_name = None
54
+ console.print("[bold red]MongoDB connection failed[/bold red]")
55
+ raise
56
+
57
+
58
+ def get_database() -> AsyncIOMotorDatabase:
59
+ """
60
+ Return the active database instance.
61
+ Use this after the connection has already been initialized.
62
+ """
63
+ if database is None:
64
+ raise RuntimeError("MongoDB is not connected yet.")
65
+
66
+ return database
67
+
68
+
69
+ async def get_database_connection() -> AsyncIOMotorDatabase:
70
+ """
71
+ Ensure MongoDB is connected before returning the database.
72
+ """
73
+ if database is None:
74
+ return await connect_to_mongo()
75
+
76
+ return database
77
+
78
+
79
+ async def close_mongo_connection() -> None:
80
+ """
81
+ Close the MongoDB client when the FastAPI app shuts down.
82
+ """
83
+ global client, database, database_name
84
+
85
+ try:
86
+ if client is not None:
87
+ client.close()
88
+ console.print(
89
+ f"[bold yellow]MongoDB connection closed[/bold yellow] [dim](database: {database_name})[/dim]"
90
+ )
91
+ finally:
92
+ client = None
93
+ database = None
94
+ database_name = None
@@ -0,0 +1,97 @@
1
+ import secrets
2
+ import string
3
+ from datetime import datetime, timezone
4
+
5
+ from fastapi import HTTPException
6
+ from rich.console import Console
7
+
8
+ from app.database.mongodb import get_database_connection
9
+ from app.models.project import ApiKeySchema, ProjectCreateResponse, ProjectSchema
10
+
11
+ console = Console()
12
+
13
+ PROJECTS_COLLECTION = "projects"
14
+ API_KEYS_COLLECTION = "api_keys"
15
+
16
+
17
+ def _utc_now() -> datetime:
18
+ return datetime.now(timezone.utc)
19
+
20
+
21
+ def _project_id(name: str) -> str:
22
+ base = "".join(char.lower() if char.isalnum() else "-" for char in name).strip("-")
23
+ compact = "-".join(segment for segment in base.split("-") if segment)
24
+ return compact or f"project-{secrets.token_hex(3)}"
25
+
26
+
27
+ def generate_api_key() -> str:
28
+ alphabet = string.ascii_letters + string.digits
29
+ token = "".join(secrets.choice(alphabet) for _ in range(32))
30
+ return f"tlm_sk_{token}"
31
+
32
+
33
+ async def ensure_project_indexes() -> None:
34
+ db = await get_database_connection()
35
+ await db[PROJECTS_COLLECTION].create_index("project_id", unique=True)
36
+ await db[PROJECTS_COLLECTION].create_index("name", unique=True)
37
+ await db[API_KEYS_COLLECTION].create_index("key", unique=True)
38
+ await db[API_KEYS_COLLECTION].create_index("project_id")
39
+ await db[API_KEYS_COLLECTION].create_index("environment")
40
+
41
+
42
+ async def create_project(name: str, description: str, environment: str) -> ProjectCreateResponse:
43
+ db = await get_database_connection()
44
+ project_id = _project_id(name)
45
+
46
+ existing = await db[PROJECTS_COLLECTION].find_one(
47
+ {"$or": [{"project_id": project_id}, {"name": name}]}
48
+ )
49
+ if existing:
50
+ raise HTTPException(status_code=409, detail="Project already exists")
51
+
52
+ project = ProjectSchema(
53
+ project_id=project_id,
54
+ name=name,
55
+ description=description,
56
+ created_at=_utc_now(),
57
+ )
58
+ api_key = ApiKeySchema(
59
+ key=generate_api_key(),
60
+ project_id=project_id,
61
+ environment=environment,
62
+ created_at=_utc_now(),
63
+ )
64
+
65
+ await db[PROJECTS_COLLECTION].insert_one(project.model_dump(mode="python"))
66
+ await db[API_KEYS_COLLECTION].insert_one(api_key.model_dump(mode="python"))
67
+ console.print(
68
+ f"[bold green]Project created[/bold green] [dim]({project.project_id}, {environment})[/dim]"
69
+ )
70
+ return ProjectCreateResponse(project=project, api_key=api_key)
71
+
72
+
73
+ async def list_projects() -> list[ProjectSchema]:
74
+ db = await get_database_connection()
75
+ documents = await db[PROJECTS_COLLECTION].find({}).sort("created_at", 1).to_list(length=500)
76
+ return [
77
+ ProjectSchema.model_validate({key: value for key, value in document.items() if key != "_id"})
78
+ for document in documents
79
+ ]
80
+
81
+
82
+ async def list_api_keys(project_id: str | None = None) -> list[ApiKeySchema]:
83
+ db = await get_database_connection()
84
+ query = {"project_id": project_id} if project_id else {}
85
+ documents = await db[API_KEYS_COLLECTION].find(query).sort("created_at", -1).to_list(length=500)
86
+ return [
87
+ ApiKeySchema.model_validate({key: value for key, value in document.items() if key != "_id"})
88
+ for document in documents
89
+ ]
90
+
91
+
92
+ async def get_project_by_api_key(api_key: str) -> ApiKeySchema:
93
+ db = await get_database_connection()
94
+ document = await db[API_KEYS_COLLECTION].find_one({"key": api_key})
95
+ if not document:
96
+ raise HTTPException(status_code=404, detail="API key not found")
97
+ return ApiKeySchema.model_validate({key: value for key, value in document.items() if key != "_id"})
@@ -0,0 +1,417 @@
1
+ import asyncio
2
+ import logging
3
+ from collections import Counter, defaultdict
4
+ from datetime import datetime, timedelta, timezone
5
+ from math import ceil
6
+ from typing import Any, Optional
7
+
8
+ from bson import ObjectId
9
+ from fastapi import HTTPException
10
+ from rich.console import Console
11
+
12
+ from app.database.mongodb import get_database_connection
13
+ from app.models.trace import StepSchema, TraceSchema
14
+ from app.models.trace_model import (
15
+ AnalyticsBreakdownItem,
16
+ AnalyticsChartPoint,
17
+ AnalyticsResponse,
18
+ AnalyticsSummary,
19
+ FailureResponse,
20
+ TraceFilters,
21
+ TraceListResponse,
22
+ )
23
+ from app.websocket.socket import manager
24
+
25
+ console = Console()
26
+ logger = logging.getLogger(__name__)
27
+
28
+ COLLECTION_NAME = "traces"
29
+ SLOW_TRACE_THRESHOLD_MS = 1500.0
30
+
31
+
32
+ def _utc_now() -> datetime:
33
+ return datetime.now(timezone.utc)
34
+
35
+
36
+ def _coerce_datetime(value: Any) -> datetime:
37
+ if isinstance(value, datetime):
38
+ return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
39
+ if isinstance(value, str) and value:
40
+ parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
41
+ return parsed if parsed.tzinfo else parsed.replace(tzinfo=timezone.utc)
42
+ return _utc_now()
43
+
44
+
45
+ def _clean_document(document: dict[str, Any]) -> dict[str, Any]:
46
+ cleaned = {key: value for key, value in document.items() if key != "_id"}
47
+ if isinstance(document.get("_id"), ObjectId):
48
+ cleaned["id"] = str(document["_id"])
49
+ return cleaned
50
+
51
+
52
+ def _normalize_steps(steps: list[dict[str, Any]]) -> list[dict[str, Any]]:
53
+ normalized_steps: list[dict[str, Any]] = []
54
+ for index, step in enumerate(steps):
55
+ raw_input = step.get("input") or step.get("input_data") or {}
56
+ raw_output = step.get("output") or step.get("output_data") or {}
57
+ normalized_steps.append(
58
+ StepSchema(
59
+ step_id=step.get("step_id") or f"step_{index + 1}",
60
+ tool_name=step.get("tool_name") or step.get("step_type") or "agent",
61
+ input=raw_input if isinstance(raw_input, dict) else {"value": raw_input},
62
+ output=raw_output if isinstance(raw_output, dict) else {"value": raw_output},
63
+ duration=float(step.get("duration", 0.0)),
64
+ success=bool(step.get("success", True)),
65
+ timestamp=_coerce_datetime(
66
+ step.get("timestamp") or step.get("created_at") or _utc_now()
67
+ ),
68
+ ).model_dump(mode="python")
69
+ )
70
+ return normalized_steps
71
+
72
+
73
+ def _infer_retry_count(steps: list[dict[str, Any]]) -> int:
74
+ retries = 0
75
+ tool_attempts: defaultdict[str, int] = defaultdict(int)
76
+ for step in steps:
77
+ tool_name = step.get("tool_name", "agent")
78
+ tool_attempts[tool_name] += 1
79
+ if tool_attempts[tool_name] > 1:
80
+ retries += 1
81
+ return retries
82
+
83
+
84
+ def _infer_status(trace_data: dict[str, Any], steps: list[dict[str, Any]]) -> str:
85
+ status = str(trace_data.get("status") or "").lower()
86
+ if status in {"success", "warning", "failed"}:
87
+ return status
88
+
89
+ if any(not step.get("success", True) for step in steps):
90
+ return "failed"
91
+ if trace_data.get("failure_reason") or trace_data.get("retry_count", 0):
92
+ return "warning"
93
+ return "success"
94
+
95
+
96
+ def _infer_failure_reason(trace_data: dict[str, Any], steps: list[dict[str, Any]]) -> Optional[str]:
97
+ failure_reason = trace_data.get("failure_reason")
98
+ if failure_reason:
99
+ return str(failure_reason)
100
+
101
+ for step in steps:
102
+ if not step.get("success", True):
103
+ output = step.get("output", {})
104
+ if isinstance(output, dict):
105
+ return str(output.get("error") or output.get("message") or step.get("tool_name"))
106
+ return str(step.get("tool_name"))
107
+ return None
108
+
109
+
110
+ def normalize_trace_document(trace_data: dict[str, Any]) -> dict[str, Any]:
111
+ created_at = _coerce_datetime(
112
+ trace_data.get("created_at") or trace_data.get("timestamp") or _utc_now()
113
+ )
114
+ normalized_steps = _normalize_steps(trace_data.get("steps", []))
115
+ retry_count = int(trace_data.get("retry_count", _infer_retry_count(normalized_steps)))
116
+ latency = float(trace_data.get("latency", 0.0))
117
+ status = _infer_status(trace_data, normalized_steps)
118
+ failure_reason = _infer_failure_reason(trace_data, normalized_steps)
119
+ slow_request = bool(trace_data.get("slow_request", latency >= SLOW_TRACE_THRESHOLD_MS))
120
+ prompt = str(trace_data.get("prompt", ""))
121
+ response = trace_data.get("response")
122
+
123
+ document = TraceSchema(
124
+ trace_id=str(trace_data.get("trace_id") or trace_data.get("id") or ObjectId()),
125
+ prompt=prompt,
126
+ response=str(response) if response is not None else None,
127
+ latency=latency,
128
+ token_count=int(trace_data.get("token_count", 0)),
129
+ model_name=trace_data.get("model_name") or trace_data.get("model") or "unknown",
130
+ project_id=str(trace_data.get("project_id") or "default"),
131
+ project_name=(
132
+ None
133
+ if trace_data.get("project_name") is None
134
+ else str(trace_data.get("project_name"))
135
+ ),
136
+ api_key=(
137
+ None if trace_data.get("api_key") is None else str(trace_data.get("api_key"))
138
+ ),
139
+ environment=str(trace_data.get("environment") or "development"),
140
+ status=status, # type: ignore[arg-type]
141
+ steps=normalized_steps,
142
+ retry_count=retry_count,
143
+ slow_request=slow_request,
144
+ failure_reason=failure_reason,
145
+ created_at=created_at,
146
+ updated_at=_utc_now(),
147
+ )
148
+ return document.model_dump(mode="python")
149
+
150
+
151
+ async def ensure_trace_indexes() -> None:
152
+ db = await get_database_connection()
153
+ collection = db[COLLECTION_NAME]
154
+ await collection.create_index("trace_id")
155
+ await collection.create_index("created_at")
156
+ await collection.create_index("status")
157
+ await collection.create_index("model_name")
158
+ await collection.create_index("project_id")
159
+ await collection.create_index("environment")
160
+
161
+
162
+ def _build_trace_query(filters: TraceFilters) -> dict[str, Any]:
163
+ query: dict[str, Any] = {}
164
+ latency_query: dict[str, float] = {}
165
+ token_query: dict[str, int] = {}
166
+
167
+ if filters.latency_min is not None:
168
+ latency_query["$gte"] = filters.latency_min
169
+ if filters.latency_max is not None:
170
+ latency_query["$lte"] = filters.latency_max
171
+ if latency_query:
172
+ query["latency"] = latency_query
173
+
174
+ if filters.token_min is not None:
175
+ token_query["$gte"] = filters.token_min
176
+ if filters.token_max is not None:
177
+ token_query["$lte"] = filters.token_max
178
+ if token_query:
179
+ query["token_count"] = token_query
180
+
181
+ if filters.status:
182
+ query["status"] = filters.status
183
+ if filters.model:
184
+ query["model_name"] = filters.model
185
+ if filters.project_id:
186
+ query["project_id"] = filters.project_id
187
+ if filters.environment:
188
+ query["environment"] = filters.environment
189
+
190
+ return query
191
+
192
+
193
+ def _serialize_traces(documents: list[dict[str, Any]]) -> list[TraceSchema]:
194
+ traces: list[TraceSchema] = []
195
+
196
+ for document in documents:
197
+ cleaned = _clean_document(document)
198
+ serialized = TraceSchema(
199
+ trace_id=str(cleaned.get("trace_id") or cleaned.get("id") or ObjectId()),
200
+ prompt=str(cleaned.get("prompt") or ""),
201
+ response="" if cleaned.get("response") is None else str(cleaned.get("response")),
202
+ latency=float(cleaned.get("latency", 0.0) or 0.0),
203
+ token_count=int(cleaned.get("token_count", 0) or 0),
204
+ model_name=str(cleaned.get("model_name") or "unknown"),
205
+ project_id=str(cleaned.get("project_id") or "default"),
206
+ project_name=(
207
+ None if cleaned.get("project_name") is None else str(cleaned.get("project_name"))
208
+ ),
209
+ api_key=None if cleaned.get("api_key") is None else str(cleaned.get("api_key")),
210
+ environment=str(cleaned.get("environment") or "development"),
211
+ status=_infer_status(cleaned, cleaned.get("steps", [])), # type: ignore[arg-type]
212
+ steps=_normalize_steps(cleaned.get("steps", [])),
213
+ retry_count=int(cleaned.get("retry_count", 0) or 0),
214
+ slow_request=bool(cleaned.get("slow_request", False)),
215
+ failure_reason=(
216
+ None
217
+ if cleaned.get("failure_reason") is None
218
+ else str(cleaned.get("failure_reason"))
219
+ ),
220
+ created_at=_coerce_datetime(cleaned.get("created_at") or _utc_now()),
221
+ updated_at=_coerce_datetime(cleaned.get("updated_at") or _utc_now()),
222
+ )
223
+ traces.append(serialized)
224
+
225
+ return traces
226
+
227
+
228
+ async def save_trace(trace_data: dict[str, Any]) -> dict[str, Any] | None:
229
+ """Insert a trace payload into MongoDB and broadcast it to connected dashboards."""
230
+ try:
231
+ db = await get_database_connection()
232
+ collection = db[COLLECTION_NAME]
233
+ document = normalize_trace_document(trace_data)
234
+ await collection.insert_one(document)
235
+ console.print(
236
+ f"[bold green]Trace saved to MongoDB[/bold green] [dim]({COLLECTION_NAME})[/dim]"
237
+ )
238
+
239
+ await manager.broadcast({"type": "trace.created", "trace": TraceSchema.model_validate(document).model_dump(mode="json")})
240
+ return document
241
+ except Exception as error:
242
+ console.print(f"[yellow]Trace persistence skipped:[/yellow] {error}")
243
+ return None
244
+
245
+
246
+ def _handle_task_exception(task: asyncio.Task) -> None:
247
+ try:
248
+ task.result()
249
+ except Exception:
250
+ pass
251
+
252
+
253
+ def save_trace_sync(trace_data: dict) -> None:
254
+ """Save trace data safely from synchronous code."""
255
+ try:
256
+ loop = asyncio.get_running_loop()
257
+ except RuntimeError:
258
+ loop = None
259
+
260
+ if loop is not None and loop.is_running():
261
+ task = loop.create_task(save_trace(trace_data))
262
+ task.add_done_callback(_handle_task_exception)
263
+ else:
264
+ try:
265
+ asyncio.run(save_trace(trace_data))
266
+ except Exception:
267
+ pass
268
+
269
+
270
+ async def list_traces(filters: TraceFilters) -> TraceListResponse:
271
+ db = await get_database_connection()
272
+ collection = db[COLLECTION_NAME]
273
+ query = _build_trace_query(filters)
274
+ total = await collection.count_documents(query)
275
+ documents = (
276
+ await collection.find(query).sort("created_at", -1).limit(filters.limit).to_list(filters.limit)
277
+ )
278
+ return TraceListResponse(total=total, items=_serialize_traces(documents))
279
+
280
+
281
+ async def get_trace_by_id(trace_id: str) -> TraceSchema:
282
+ db = await get_database_connection()
283
+ collection = db[COLLECTION_NAME]
284
+ document = await collection.find_one({"trace_id": trace_id})
285
+ if not document:
286
+ raise HTTPException(status_code=404, detail="Trace not found")
287
+ return TraceSchema.model_validate(_clean_document(document))
288
+
289
+
290
+ def _calculate_percentile(values: list[float], percentile: float) -> float:
291
+ if not values:
292
+ return 0.0
293
+ ordered = sorted(values)
294
+ index = max(0, ceil((percentile / 100) * len(ordered)) - 1)
295
+ return round(ordered[index], 2)
296
+
297
+
298
+ async def get_analytics(filters: TraceFilters | None = None) -> AnalyticsResponse:
299
+ db = await get_database_connection()
300
+ collection = db[COLLECTION_NAME]
301
+ query = _build_trace_query(filters or TraceFilters())
302
+ documents = await collection.find(query).sort("created_at", 1).to_list(length=5000)
303
+ traces = _serialize_traces(documents)
304
+
305
+ if not traces:
306
+ empty_summary = AnalyticsSummary(
307
+ total_traces=0,
308
+ success_rate=0.0,
309
+ average_latency=0.0,
310
+ p95_latency=0.0,
311
+ total_token_usage=0,
312
+ failed_traces=0,
313
+ warning_traces=0,
314
+ retries=0,
315
+ slow_requests=0,
316
+ )
317
+ return AnalyticsResponse(
318
+ summary=empty_summary,
319
+ charts=[],
320
+ status_breakdown=[],
321
+ model_breakdown=[],
322
+ project_breakdown=[],
323
+ recent_failures=[],
324
+ )
325
+
326
+ total_traces = len(traces)
327
+ latencies = [trace.latency for trace in traces]
328
+ total_token_usage = sum(trace.token_count for trace in traces)
329
+ failed_traces = sum(1 for trace in traces if trace.status == "failed")
330
+ warning_traces = sum(1 for trace in traces if trace.status == "warning")
331
+ retries = sum(trace.retry_count for trace in traces)
332
+ slow_requests = sum(1 for trace in traces if trace.slow_request)
333
+ success_rate = round(
334
+ (sum(1 for trace in traces if trace.status == "success") / total_traces) * 100,
335
+ 2,
336
+ )
337
+
338
+ bucketed: dict[str, list[TraceSchema]] = defaultdict(list)
339
+ window_start = _utc_now() - timedelta(hours=24)
340
+ for trace in traces:
341
+ if trace.created_at < window_start:
342
+ continue
343
+ label = trace.created_at.astimezone(timezone.utc).strftime("%H:00")
344
+ bucketed[label].append(trace)
345
+
346
+ chart_points = [
347
+ AnalyticsChartPoint(
348
+ label=label,
349
+ latency=round(sum(item.latency for item in items) / len(items), 2),
350
+ tokens=sum(item.token_count for item in items),
351
+ traces=len(items),
352
+ )
353
+ for label, items in sorted(bucketed.items())
354
+ ]
355
+
356
+ status_breakdown = [
357
+ AnalyticsBreakdownItem(key=key, count=count)
358
+ for key, count in Counter(trace.status for trace in traces).most_common()
359
+ ]
360
+ model_breakdown = [
361
+ AnalyticsBreakdownItem(key=key, count=count)
362
+ for key, count in Counter(trace.model_name or "unknown" for trace in traces).most_common()
363
+ ]
364
+ project_breakdown = [
365
+ AnalyticsBreakdownItem(key=key, count=count)
366
+ for key, count in Counter(trace.project_name or trace.project_id for trace in traces).most_common()
367
+ ]
368
+
369
+ summary = AnalyticsSummary(
370
+ total_traces=total_traces,
371
+ success_rate=success_rate,
372
+ average_latency=round(sum(latencies) / total_traces, 2),
373
+ p95_latency=_calculate_percentile(latencies, 95),
374
+ total_token_usage=total_token_usage,
375
+ failed_traces=failed_traces,
376
+ warning_traces=warning_traces,
377
+ retries=retries,
378
+ slow_requests=slow_requests,
379
+ )
380
+
381
+ recent_failures = [
382
+ trace
383
+ for trace in sorted(traces, key=lambda item: item.created_at, reverse=True)
384
+ if trace.status in {"failed", "warning"} or trace.slow_request or trace.retry_count > 0
385
+ ][:5]
386
+
387
+ return AnalyticsResponse(
388
+ summary=summary,
389
+ charts=chart_points,
390
+ status_breakdown=status_breakdown,
391
+ model_breakdown=model_breakdown,
392
+ project_breakdown=project_breakdown,
393
+ recent_failures=recent_failures,
394
+ )
395
+
396
+
397
+ async def get_failures(limit: int = 25, filters: TraceFilters | None = None) -> FailureResponse:
398
+ db = await get_database_connection()
399
+ collection = db[COLLECTION_NAME]
400
+ query = _build_trace_query(filters or TraceFilters(limit=min(limit, 200)))
401
+ documents = await collection.find(query).sort("created_at", -1).to_list(length=1000)
402
+ traces = _serialize_traces(documents)
403
+
404
+ failed_traces = [trace for trace in traces if trace.status == "failed"][:limit]
405
+ retry_traces = [trace for trace in traces if trace.retry_count > 0][:limit]
406
+ slow_requests = [trace for trace in traces if trace.slow_request][:limit]
407
+
408
+ return FailureResponse(
409
+ failed_traces=failed_traces,
410
+ retries=retry_traces,
411
+ slow_requests=slow_requests,
412
+ totals={
413
+ "failed_traces": len([trace for trace in traces if trace.status == "failed"]),
414
+ "retries": len([trace for trace in traces if trace.retry_count > 0]),
415
+ "slow_requests": len([trace for trace in traces if trace.slow_request]),
416
+ },
417
+ )