webagents 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webagents/__init__.py +18 -0
- webagents/agents/__init__.py +13 -0
- webagents/agents/core/__init__.py +19 -0
- webagents/agents/core/base_agent.py +1834 -0
- webagents/agents/core/handoffs.py +293 -0
- webagents/agents/handoffs/__init__.py +0 -0
- webagents/agents/interfaces/__init__.py +0 -0
- webagents/agents/lifecycle/__init__.py +0 -0
- webagents/agents/skills/__init__.py +109 -0
- webagents/agents/skills/base.py +136 -0
- webagents/agents/skills/core/__init__.py +8 -0
- webagents/agents/skills/core/guardrails/__init__.py +0 -0
- webagents/agents/skills/core/llm/__init__.py +0 -0
- webagents/agents/skills/core/llm/anthropic/__init__.py +1 -0
- webagents/agents/skills/core/llm/litellm/__init__.py +10 -0
- webagents/agents/skills/core/llm/litellm/skill.py +538 -0
- webagents/agents/skills/core/llm/openai/__init__.py +1 -0
- webagents/agents/skills/core/llm/xai/__init__.py +1 -0
- webagents/agents/skills/core/mcp/README.md +375 -0
- webagents/agents/skills/core/mcp/__init__.py +15 -0
- webagents/agents/skills/core/mcp/skill.py +731 -0
- webagents/agents/skills/core/memory/__init__.py +11 -0
- webagents/agents/skills/core/memory/long_term_memory/__init__.py +10 -0
- webagents/agents/skills/core/memory/long_term_memory/memory_skill.py +639 -0
- webagents/agents/skills/core/memory/short_term_memory/__init__.py +9 -0
- webagents/agents/skills/core/memory/short_term_memory/skill.py +341 -0
- webagents/agents/skills/core/memory/vector_memory/skill.py +447 -0
- webagents/agents/skills/core/planning/__init__.py +9 -0
- webagents/agents/skills/core/planning/planner.py +343 -0
- webagents/agents/skills/ecosystem/__init__.py +0 -0
- webagents/agents/skills/ecosystem/crewai/__init__.py +1 -0
- webagents/agents/skills/ecosystem/database/__init__.py +1 -0
- webagents/agents/skills/ecosystem/filesystem/__init__.py +0 -0
- webagents/agents/skills/ecosystem/google/__init__.py +0 -0
- webagents/agents/skills/ecosystem/google/calendar/__init__.py +6 -0
- webagents/agents/skills/ecosystem/google/calendar/skill.py +306 -0
- webagents/agents/skills/ecosystem/n8n/__init__.py +0 -0
- webagents/agents/skills/ecosystem/openai_agents/__init__.py +0 -0
- webagents/agents/skills/ecosystem/web/__init__.py +0 -0
- webagents/agents/skills/ecosystem/zapier/__init__.py +0 -0
- webagents/agents/skills/robutler/__init__.py +11 -0
- webagents/agents/skills/robutler/auth/README.md +63 -0
- webagents/agents/skills/robutler/auth/__init__.py +17 -0
- webagents/agents/skills/robutler/auth/skill.py +354 -0
- webagents/agents/skills/robutler/crm/__init__.py +18 -0
- webagents/agents/skills/robutler/crm/skill.py +368 -0
- webagents/agents/skills/robutler/discovery/README.md +281 -0
- webagents/agents/skills/robutler/discovery/__init__.py +16 -0
- webagents/agents/skills/robutler/discovery/skill.py +230 -0
- webagents/agents/skills/robutler/kv/__init__.py +6 -0
- webagents/agents/skills/robutler/kv/skill.py +80 -0
- webagents/agents/skills/robutler/message_history/__init__.py +9 -0
- webagents/agents/skills/robutler/message_history/skill.py +270 -0
- webagents/agents/skills/robutler/messages/__init__.py +0 -0
- webagents/agents/skills/robutler/nli/__init__.py +13 -0
- webagents/agents/skills/robutler/nli/skill.py +687 -0
- webagents/agents/skills/robutler/notifications/__init__.py +5 -0
- webagents/agents/skills/robutler/notifications/skill.py +141 -0
- webagents/agents/skills/robutler/payments/__init__.py +41 -0
- webagents/agents/skills/robutler/payments/exceptions.py +255 -0
- webagents/agents/skills/robutler/payments/skill.py +610 -0
- webagents/agents/skills/robutler/storage/__init__.py +10 -0
- webagents/agents/skills/robutler/storage/files/__init__.py +9 -0
- webagents/agents/skills/robutler/storage/files/skill.py +445 -0
- webagents/agents/skills/robutler/storage/json/__init__.py +9 -0
- webagents/agents/skills/robutler/storage/json/skill.py +336 -0
- webagents/agents/skills/robutler/storage/kv/skill.py +88 -0
- webagents/agents/skills/robutler/storage.py +389 -0
- webagents/agents/tools/__init__.py +0 -0
- webagents/agents/tools/decorators.py +426 -0
- webagents/agents/tracing/__init__.py +0 -0
- webagents/agents/workflows/__init__.py +0 -0
- webagents/api/__init__.py +17 -0
- webagents/api/client.py +1207 -0
- webagents/api/types.py +253 -0
- webagents/scripts/__init__.py +0 -0
- webagents/server/__init__.py +28 -0
- webagents/server/context/__init__.py +0 -0
- webagents/server/context/context_vars.py +121 -0
- webagents/server/core/__init__.py +0 -0
- webagents/server/core/app.py +843 -0
- webagents/server/core/middleware.py +69 -0
- webagents/server/core/models.py +98 -0
- webagents/server/core/monitoring.py +59 -0
- webagents/server/endpoints/__init__.py +0 -0
- webagents/server/interfaces/__init__.py +0 -0
- webagents/server/middleware.py +330 -0
- webagents/server/models.py +92 -0
- webagents/server/monitoring.py +659 -0
- webagents/utils/__init__.py +0 -0
- webagents/utils/logging.py +359 -0
- webagents-0.1.12.dist-info/METADATA +99 -0
- webagents-0.1.12.dist-info/RECORD +96 -0
- webagents-0.1.12.dist-info/WHEEL +4 -0
- webagents-0.1.12.dist-info/entry_points.txt +2 -0
- webagents-0.1.12.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,659 @@
|
|
1
|
+
"""
|
2
|
+
Monitoring & Observability - Robutler V2.0
|
3
|
+
|
4
|
+
Comprehensive monitoring system with Prometheus metrics, structured logging,
|
5
|
+
and request tracing for production deployments.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import time
|
9
|
+
import json
|
10
|
+
import logging
|
11
|
+
import asyncio
|
12
|
+
from typing import Dict, Any, Optional, List, Counter as CounterType
|
13
|
+
from datetime import datetime, timedelta
|
14
|
+
from dataclasses import dataclass, field
|
15
|
+
from contextvars import ContextVar
|
16
|
+
from collections import defaultdict, Counter
|
17
|
+
|
18
|
+
try:
|
19
|
+
from prometheus_client import (
|
20
|
+
Counter, Histogram, Gauge, Info, CollectorRegistry,
|
21
|
+
generate_latest, CONTENT_TYPE_LATEST
|
22
|
+
)
|
23
|
+
PROMETHEUS_AVAILABLE = True
|
24
|
+
except ImportError:
|
25
|
+
PROMETHEUS_AVAILABLE = False
|
26
|
+
# Mock classes for when prometheus_client is not installed
|
27
|
+
class Counter:
|
28
|
+
def __init__(self, *args, **kwargs):
|
29
|
+
pass
|
30
|
+
def inc(self, *args, **kwargs):
|
31
|
+
pass
|
32
|
+
def labels(self, **kwargs):
|
33
|
+
return self
|
34
|
+
|
35
|
+
class Histogram:
|
36
|
+
def __init__(self, *args, **kwargs):
|
37
|
+
pass
|
38
|
+
def observe(self, *args, **kwargs):
|
39
|
+
pass
|
40
|
+
def time(self):
|
41
|
+
return self
|
42
|
+
def labels(self, **kwargs):
|
43
|
+
return self
|
44
|
+
def __enter__(self):
|
45
|
+
return self
|
46
|
+
def __exit__(self, *args):
|
47
|
+
pass
|
48
|
+
|
49
|
+
class Gauge:
|
50
|
+
def __init__(self, *args, **kwargs):
|
51
|
+
pass
|
52
|
+
def set(self, *args, **kwargs):
|
53
|
+
pass
|
54
|
+
def inc(self, *args, **kwargs):
|
55
|
+
pass
|
56
|
+
def dec(self, *args, **kwargs):
|
57
|
+
pass
|
58
|
+
def labels(self, **kwargs):
|
59
|
+
return self
|
60
|
+
|
61
|
+
class Info:
|
62
|
+
def __init__(self, *args, **kwargs):
|
63
|
+
pass
|
64
|
+
def info(self, *args, **kwargs):
|
65
|
+
pass
|
66
|
+
|
67
|
+
def generate_latest(*args, **kwargs):
|
68
|
+
return b"# Prometheus not available\n"
|
69
|
+
|
70
|
+
CONTENT_TYPE_LATEST = "text/plain"
|
71
|
+
CollectorRegistry = None
|
72
|
+
|
73
|
+
|
74
|
+
@dataclass
|
75
|
+
class RequestMetrics:
|
76
|
+
"""Metrics for individual requests"""
|
77
|
+
request_id: str
|
78
|
+
method: str
|
79
|
+
path: str
|
80
|
+
agent_name: Optional[str]
|
81
|
+
start_time: float
|
82
|
+
end_time: Optional[float] = None
|
83
|
+
status_code: Optional[int] = None
|
84
|
+
error: Optional[str] = None
|
85
|
+
duration_ms: Optional[float] = None
|
86
|
+
tokens_used: int = 0
|
87
|
+
stream: bool = False
|
88
|
+
|
89
|
+
def finish(self, status_code: int, error: Optional[str] = None):
|
90
|
+
"""Mark request as finished"""
|
91
|
+
self.end_time = time.time()
|
92
|
+
self.status_code = status_code
|
93
|
+
self.error = error
|
94
|
+
self.duration_ms = (self.end_time - self.start_time) * 1000
|
95
|
+
|
96
|
+
def to_dict(self) -> Dict[str, Any]:
|
97
|
+
"""Convert to dictionary for logging"""
|
98
|
+
return {
|
99
|
+
"request_id": self.request_id,
|
100
|
+
"method": self.method,
|
101
|
+
"path": self.path,
|
102
|
+
"agent_name": self.agent_name,
|
103
|
+
"duration_ms": self.duration_ms,
|
104
|
+
"status_code": self.status_code,
|
105
|
+
"error": self.error,
|
106
|
+
"tokens_used": self.tokens_used,
|
107
|
+
"stream": self.stream,
|
108
|
+
"timestamp": datetime.fromtimestamp(self.start_time).isoformat()
|
109
|
+
}
|
110
|
+
|
111
|
+
|
112
|
+
class PrometheusMetrics:
|
113
|
+
"""Prometheus metrics collection for Robutler server"""
|
114
|
+
|
115
|
+
def __init__(self, registry: CollectorRegistry = None):
|
116
|
+
self.registry = registry
|
117
|
+
|
118
|
+
if not PROMETHEUS_AVAILABLE:
|
119
|
+
logging.warning("Prometheus client not available - metrics will be mocked")
|
120
|
+
return
|
121
|
+
|
122
|
+
# HTTP Request metrics
|
123
|
+
self.http_requests_total = Counter(
|
124
|
+
'robutler_http_requests_total',
|
125
|
+
'Total HTTP requests',
|
126
|
+
['method', 'path', 'status_code', 'agent_name'],
|
127
|
+
registry=registry
|
128
|
+
)
|
129
|
+
|
130
|
+
self.http_request_duration = Histogram(
|
131
|
+
'robutler_http_request_duration_seconds',
|
132
|
+
'HTTP request duration',
|
133
|
+
['method', 'path', 'agent_name'],
|
134
|
+
registry=registry
|
135
|
+
)
|
136
|
+
|
137
|
+
self.http_requests_in_progress = Gauge(
|
138
|
+
'robutler_http_requests_in_progress',
|
139
|
+
'HTTP requests currently in progress',
|
140
|
+
['method', 'path', 'agent_name'],
|
141
|
+
registry=registry
|
142
|
+
)
|
143
|
+
|
144
|
+
# Agent metrics
|
145
|
+
self.agent_requests_total = Counter(
|
146
|
+
'robutler_agent_requests_total',
|
147
|
+
'Total requests per agent',
|
148
|
+
['agent_name', 'stream'],
|
149
|
+
registry=registry
|
150
|
+
)
|
151
|
+
|
152
|
+
self.agent_request_duration = Histogram(
|
153
|
+
'robutler_agent_request_duration_seconds',
|
154
|
+
'Agent request processing duration',
|
155
|
+
['agent_name', 'stream'],
|
156
|
+
registry=registry
|
157
|
+
)
|
158
|
+
|
159
|
+
self.agent_errors_total = Counter(
|
160
|
+
'robutler_agent_errors_total',
|
161
|
+
'Total agent processing errors',
|
162
|
+
['agent_name', 'error_type'],
|
163
|
+
registry=registry
|
164
|
+
)
|
165
|
+
|
166
|
+
# Token usage metrics
|
167
|
+
self.tokens_used_total = Counter(
|
168
|
+
'robutler_tokens_used_total',
|
169
|
+
'Total tokens used',
|
170
|
+
['agent_name', 'model'],
|
171
|
+
registry=registry
|
172
|
+
)
|
173
|
+
|
174
|
+
self.credits_spent_total = Counter(
|
175
|
+
'robutler_credits_spent_total',
|
176
|
+
'Total credits spent',
|
177
|
+
['agent_name', 'user_id'],
|
178
|
+
registry=registry
|
179
|
+
)
|
180
|
+
|
181
|
+
# System metrics
|
182
|
+
self.active_agents = Gauge(
|
183
|
+
'robutler_active_agents',
|
184
|
+
'Number of active agents',
|
185
|
+
registry=registry
|
186
|
+
)
|
187
|
+
|
188
|
+
self.dynamic_agents_cache_size = Gauge(
|
189
|
+
'robutler_dynamic_agents_cache_size',
|
190
|
+
'Dynamic agents cache size',
|
191
|
+
registry=registry
|
192
|
+
)
|
193
|
+
|
194
|
+
self.rate_limit_exceeded_total = Counter(
|
195
|
+
'robutler_rate_limit_exceeded_total',
|
196
|
+
'Total rate limit violations',
|
197
|
+
['client_type', 'limit_type'],
|
198
|
+
registry=registry
|
199
|
+
)
|
200
|
+
|
201
|
+
# Server info
|
202
|
+
self.server_info = Info(
|
203
|
+
'robutler_server_info',
|
204
|
+
'Server information',
|
205
|
+
registry=registry
|
206
|
+
)
|
207
|
+
|
208
|
+
def record_http_request_start(self, method: str, path: str, agent_name: Optional[str] = None):
|
209
|
+
"""Record start of HTTP request"""
|
210
|
+
if not PROMETHEUS_AVAILABLE:
|
211
|
+
return
|
212
|
+
|
213
|
+
self.http_requests_in_progress.labels(
|
214
|
+
method=method,
|
215
|
+
path=path,
|
216
|
+
agent_name=agent_name or "unknown"
|
217
|
+
).inc()
|
218
|
+
|
219
|
+
def record_http_request_finish(
|
220
|
+
self,
|
221
|
+
method: str,
|
222
|
+
path: str,
|
223
|
+
status_code: int,
|
224
|
+
duration: float,
|
225
|
+
agent_name: Optional[str] = None
|
226
|
+
):
|
227
|
+
"""Record completion of HTTP request"""
|
228
|
+
if not PROMETHEUS_AVAILABLE:
|
229
|
+
return
|
230
|
+
|
231
|
+
labels = {
|
232
|
+
"method": method,
|
233
|
+
"path": path,
|
234
|
+
"agent_name": agent_name or "unknown"
|
235
|
+
}
|
236
|
+
|
237
|
+
# Record request completion
|
238
|
+
self.http_requests_total.labels(
|
239
|
+
**labels,
|
240
|
+
status_code=str(status_code)
|
241
|
+
).inc()
|
242
|
+
|
243
|
+
# Record duration
|
244
|
+
self.http_request_duration.labels(**labels).observe(duration)
|
245
|
+
|
246
|
+
# Decrement in-progress counter
|
247
|
+
self.http_requests_in_progress.labels(**labels).dec()
|
248
|
+
|
249
|
+
def record_agent_request(
|
250
|
+
self,
|
251
|
+
agent_name: str,
|
252
|
+
duration: float,
|
253
|
+
stream: bool = False,
|
254
|
+
error: Optional[str] = None
|
255
|
+
):
|
256
|
+
"""Record agent request completion"""
|
257
|
+
if not PROMETHEUS_AVAILABLE:
|
258
|
+
return
|
259
|
+
|
260
|
+
# Count request
|
261
|
+
self.agent_requests_total.labels(
|
262
|
+
agent_name=agent_name,
|
263
|
+
stream=str(stream).lower()
|
264
|
+
).inc()
|
265
|
+
|
266
|
+
# Record duration
|
267
|
+
self.agent_request_duration.labels(
|
268
|
+
agent_name=agent_name,
|
269
|
+
stream=str(stream).lower()
|
270
|
+
).observe(duration)
|
271
|
+
|
272
|
+
# Record error if any
|
273
|
+
if error:
|
274
|
+
self.agent_errors_total.labels(
|
275
|
+
agent_name=agent_name,
|
276
|
+
error_type=error
|
277
|
+
).inc()
|
278
|
+
|
279
|
+
def record_token_usage(self, agent_name: str, model: str, tokens: int):
|
280
|
+
"""Record token usage"""
|
281
|
+
if not PROMETHEUS_AVAILABLE:
|
282
|
+
return
|
283
|
+
|
284
|
+
self.tokens_used_total.labels(
|
285
|
+
agent_name=agent_name,
|
286
|
+
model=model
|
287
|
+
).inc(tokens)
|
288
|
+
|
289
|
+
def record_credit_usage(self, agent_name: str, user_id: str, credits: float):
|
290
|
+
"""Record credit spending"""
|
291
|
+
if not PROMETHEUS_AVAILABLE:
|
292
|
+
return
|
293
|
+
|
294
|
+
self.credits_spent_total.labels(
|
295
|
+
agent_name=agent_name,
|
296
|
+
user_id=user_id
|
297
|
+
).inc(credits)
|
298
|
+
|
299
|
+
def update_active_agents(self, count: int):
|
300
|
+
"""Update active agents count"""
|
301
|
+
if not PROMETHEUS_AVAILABLE:
|
302
|
+
return
|
303
|
+
|
304
|
+
self.active_agents.set(count)
|
305
|
+
|
306
|
+
def update_dynamic_cache_size(self, size: int):
|
307
|
+
"""Update dynamic agents cache size"""
|
308
|
+
if not PROMETHEUS_AVAILABLE:
|
309
|
+
return
|
310
|
+
|
311
|
+
self.dynamic_agents_cache_size.set(size)
|
312
|
+
|
313
|
+
def record_rate_limit_exceeded(self, client_type: str, limit_type: str):
|
314
|
+
"""Record rate limit violation"""
|
315
|
+
if not PROMETHEUS_AVAILABLE:
|
316
|
+
return
|
317
|
+
|
318
|
+
self.rate_limit_exceeded_total.labels(
|
319
|
+
client_type=client_type,
|
320
|
+
limit_type=limit_type
|
321
|
+
).inc()
|
322
|
+
|
323
|
+
def set_server_info(self, version: str, agents_count: int, **kwargs):
|
324
|
+
"""Set server information"""
|
325
|
+
if not PROMETHEUS_AVAILABLE:
|
326
|
+
return
|
327
|
+
|
328
|
+
info_dict = {
|
329
|
+
"version": version,
|
330
|
+
"agents_count": str(agents_count),
|
331
|
+
**{k: str(v) for k, v in kwargs.items()}
|
332
|
+
}
|
333
|
+
|
334
|
+
self.server_info.info(info_dict)
|
335
|
+
|
336
|
+
|
337
|
+
class StructuredLogger:
|
338
|
+
"""Structured logging with JSON output and performance tracking"""
|
339
|
+
|
340
|
+
def __init__(self, name: str = "webagents", level: int = logging.INFO):
|
341
|
+
self.logger = logging.getLogger(name)
|
342
|
+
self.logger.setLevel(level)
|
343
|
+
|
344
|
+
# Create JSON formatter
|
345
|
+
formatter = logging.Formatter(
|
346
|
+
json.dumps({
|
347
|
+
"timestamp": "%(asctime)s",
|
348
|
+
"level": "%(levelname)s",
|
349
|
+
"name": "%(name)s",
|
350
|
+
"message": "%(message)s"
|
351
|
+
})
|
352
|
+
)
|
353
|
+
|
354
|
+
# Console handler with JSON formatting
|
355
|
+
if not self.logger.handlers:
|
356
|
+
handler = logging.StreamHandler()
|
357
|
+
handler.setFormatter(JsonFormatter())
|
358
|
+
self.logger.addHandler(handler)
|
359
|
+
|
360
|
+
def info(self, message: str, **kwargs):
|
361
|
+
"""Log info message with structured data"""
|
362
|
+
self._log(logging.INFO, message, **kwargs)
|
363
|
+
|
364
|
+
def warning(self, message: str, **kwargs):
|
365
|
+
"""Log warning message with structured data"""
|
366
|
+
self._log(logging.WARNING, message, **kwargs)
|
367
|
+
|
368
|
+
def error(self, message: str, **kwargs):
|
369
|
+
"""Log error message with structured data"""
|
370
|
+
self._log(logging.ERROR, message, **kwargs)
|
371
|
+
|
372
|
+
def debug(self, message: str, **kwargs):
|
373
|
+
"""Log debug message with structured data"""
|
374
|
+
self._log(logging.DEBUG, message, **kwargs)
|
375
|
+
|
376
|
+
def _log(self, level: int, message: str, **kwargs):
|
377
|
+
"""Internal logging with structured data"""
|
378
|
+
log_data = {
|
379
|
+
"message": message,
|
380
|
+
"timestamp": datetime.utcnow().isoformat(),
|
381
|
+
**kwargs
|
382
|
+
}
|
383
|
+
|
384
|
+
# Use the message as the log message, but include structured data
|
385
|
+
extra_data = json.dumps(log_data)
|
386
|
+
self.logger.log(level, extra_data)
|
387
|
+
|
388
|
+
|
389
|
+
class JsonFormatter(logging.Formatter):
|
390
|
+
"""JSON formatter for structured logging"""
|
391
|
+
|
392
|
+
def format(self, record):
|
393
|
+
# Try to parse the message as JSON (if it's already structured)
|
394
|
+
try:
|
395
|
+
if isinstance(record.msg, str) and record.msg.startswith('{'):
|
396
|
+
log_data = json.loads(record.msg)
|
397
|
+
else:
|
398
|
+
log_data = {"message": str(record.msg)}
|
399
|
+
except (json.JSONDecodeError, TypeError):
|
400
|
+
log_data = {"message": str(record.msg)}
|
401
|
+
|
402
|
+
# Add standard fields
|
403
|
+
log_data.update({
|
404
|
+
"timestamp": datetime.fromtimestamp(record.created).isoformat(),
|
405
|
+
"level": record.levelname,
|
406
|
+
"logger": record.name,
|
407
|
+
"module": record.module,
|
408
|
+
"line": record.lineno
|
409
|
+
})
|
410
|
+
|
411
|
+
# Add exception info if present
|
412
|
+
if record.exc_info:
|
413
|
+
log_data["exception"] = self.formatException(record.exc_info)
|
414
|
+
|
415
|
+
return json.dumps(log_data)
|
416
|
+
|
417
|
+
|
418
|
+
class MonitoringSystem:
|
419
|
+
"""Comprehensive monitoring system coordinator"""
|
420
|
+
|
421
|
+
def __init__(
|
422
|
+
self,
|
423
|
+
enable_prometheus: bool = True,
|
424
|
+
enable_structured_logging: bool = True,
|
425
|
+
metrics_port: int = 9090
|
426
|
+
):
|
427
|
+
self.enable_prometheus = enable_prometheus and PROMETHEUS_AVAILABLE
|
428
|
+
self.enable_structured_logging = enable_structured_logging
|
429
|
+
self.metrics_port = metrics_port
|
430
|
+
|
431
|
+
# Initialize components
|
432
|
+
if self.enable_prometheus:
|
433
|
+
self.registry = CollectorRegistry()
|
434
|
+
self.prometheus = PrometheusMetrics(self.registry)
|
435
|
+
else:
|
436
|
+
self.registry = None
|
437
|
+
self.prometheus = PrometheusMetrics(None) # Mock metrics
|
438
|
+
|
439
|
+
if self.enable_structured_logging:
|
440
|
+
self.logger = StructuredLogger("robutler.monitoring")
|
441
|
+
else:
|
442
|
+
self.logger = None
|
443
|
+
|
444
|
+
# Request tracking
|
445
|
+
self.active_requests: Dict[str, RequestMetrics] = {}
|
446
|
+
self.recent_requests: List[RequestMetrics] = []
|
447
|
+
self.request_history_limit = 1000
|
448
|
+
|
449
|
+
# Performance tracking
|
450
|
+
self.performance_stats = {
|
451
|
+
"total_requests": 0,
|
452
|
+
"total_errors": 0,
|
453
|
+
"avg_response_time": 0.0,
|
454
|
+
"requests_per_minute": 0
|
455
|
+
}
|
456
|
+
|
457
|
+
self.last_stats_update = time.time()
|
458
|
+
|
459
|
+
def start_request(
|
460
|
+
self,
|
461
|
+
request_id: str,
|
462
|
+
method: str,
|
463
|
+
path: str,
|
464
|
+
agent_name: Optional[str] = None,
|
465
|
+
**kwargs
|
466
|
+
) -> RequestMetrics:
|
467
|
+
"""Start tracking a request"""
|
468
|
+
|
469
|
+
metrics = RequestMetrics(
|
470
|
+
request_id=request_id,
|
471
|
+
method=method,
|
472
|
+
path=path,
|
473
|
+
agent_name=agent_name,
|
474
|
+
start_time=time.time()
|
475
|
+
)
|
476
|
+
|
477
|
+
self.active_requests[request_id] = metrics
|
478
|
+
|
479
|
+
# Record in Prometheus
|
480
|
+
self.prometheus.record_http_request_start(method, path, agent_name)
|
481
|
+
|
482
|
+
# Log request start
|
483
|
+
if self.logger:
|
484
|
+
self.logger.info(
|
485
|
+
"Request started",
|
486
|
+
request_id=request_id,
|
487
|
+
method=method,
|
488
|
+
path=path,
|
489
|
+
agent_name=agent_name,
|
490
|
+
**kwargs
|
491
|
+
)
|
492
|
+
|
493
|
+
return metrics
|
494
|
+
|
495
|
+
def finish_request(
|
496
|
+
self,
|
497
|
+
request_id: str,
|
498
|
+
status_code: int,
|
499
|
+
error: Optional[str] = None,
|
500
|
+
tokens_used: int = 0,
|
501
|
+
**kwargs
|
502
|
+
):
|
503
|
+
"""Finish tracking a request"""
|
504
|
+
|
505
|
+
if request_id not in self.active_requests:
|
506
|
+
return
|
507
|
+
|
508
|
+
metrics = self.active_requests.pop(request_id)
|
509
|
+
metrics.finish(status_code, error)
|
510
|
+
metrics.tokens_used = tokens_used
|
511
|
+
|
512
|
+
# Record in Prometheus
|
513
|
+
self.prometheus.record_http_request_finish(
|
514
|
+
metrics.method,
|
515
|
+
metrics.path,
|
516
|
+
status_code,
|
517
|
+
metrics.duration_ms / 1000, # Convert to seconds
|
518
|
+
metrics.agent_name
|
519
|
+
)
|
520
|
+
|
521
|
+
# Record agent-specific metrics
|
522
|
+
if metrics.agent_name:
|
523
|
+
self.prometheus.record_agent_request(
|
524
|
+
metrics.agent_name,
|
525
|
+
metrics.duration_ms / 1000,
|
526
|
+
metrics.stream,
|
527
|
+
error
|
528
|
+
)
|
529
|
+
|
530
|
+
# Log request completion
|
531
|
+
if self.logger:
|
532
|
+
self.logger.info(
|
533
|
+
"Request completed",
|
534
|
+
**metrics.to_dict(),
|
535
|
+
**kwargs
|
536
|
+
)
|
537
|
+
|
538
|
+
# Add to recent requests history
|
539
|
+
self.recent_requests.append(metrics)
|
540
|
+
if len(self.recent_requests) > self.request_history_limit:
|
541
|
+
self.recent_requests.pop(0)
|
542
|
+
|
543
|
+
# Update performance stats
|
544
|
+
self._update_performance_stats(metrics)
|
545
|
+
|
546
|
+
def record_token_usage(self, agent_name: str, model: str, tokens: int):
|
547
|
+
"""Record token usage"""
|
548
|
+
self.prometheus.record_token_usage(agent_name, model, tokens)
|
549
|
+
|
550
|
+
if self.logger:
|
551
|
+
self.logger.info(
|
552
|
+
"Token usage recorded",
|
553
|
+
agent_name=agent_name,
|
554
|
+
model=model,
|
555
|
+
tokens=tokens
|
556
|
+
)
|
557
|
+
|
558
|
+
def record_credit_usage(self, agent_name: str, user_id: str, credits: float):
|
559
|
+
"""Record credit spending"""
|
560
|
+
self.prometheus.record_credit_usage(agent_name, user_id, credits)
|
561
|
+
|
562
|
+
if self.logger:
|
563
|
+
self.logger.info(
|
564
|
+
"Credit usage recorded",
|
565
|
+
agent_name=agent_name,
|
566
|
+
user_id=user_id,
|
567
|
+
credits=credits
|
568
|
+
)
|
569
|
+
|
570
|
+
def update_system_metrics(
|
571
|
+
self,
|
572
|
+
active_agents: int,
|
573
|
+
dynamic_cache_size: Optional[int] = None
|
574
|
+
):
|
575
|
+
"""Update system-level metrics"""
|
576
|
+
self.prometheus.update_active_agents(active_agents)
|
577
|
+
|
578
|
+
if dynamic_cache_size is not None:
|
579
|
+
self.prometheus.update_dynamic_cache_size(dynamic_cache_size)
|
580
|
+
|
581
|
+
def get_metrics_response(self) -> bytes:
|
582
|
+
"""Get Prometheus metrics response"""
|
583
|
+
if not self.enable_prometheus:
|
584
|
+
return b"# Prometheus metrics not enabled\n"
|
585
|
+
|
586
|
+
return generate_latest(self.registry)
|
587
|
+
|
588
|
+
def get_performance_stats(self) -> Dict[str, Any]:
|
589
|
+
"""Get current performance statistics"""
|
590
|
+
now = time.time()
|
591
|
+
|
592
|
+
# Calculate requests per minute
|
593
|
+
minute_ago = now - 60
|
594
|
+
recent_requests = [
|
595
|
+
r for r in self.recent_requests
|
596
|
+
if r.start_time > minute_ago
|
597
|
+
]
|
598
|
+
|
599
|
+
return {
|
600
|
+
"total_requests": len(self.recent_requests),
|
601
|
+
"requests_last_minute": len(recent_requests),
|
602
|
+
"active_requests": len(self.active_requests),
|
603
|
+
"average_response_time_ms": self._calculate_avg_response_time(),
|
604
|
+
"error_rate": self._calculate_error_rate(),
|
605
|
+
"last_updated": now
|
606
|
+
}
|
607
|
+
|
608
|
+
def _update_performance_stats(self, metrics: RequestMetrics):
|
609
|
+
"""Update internal performance statistics"""
|
610
|
+
self.performance_stats["total_requests"] += 1
|
611
|
+
|
612
|
+
if metrics.error:
|
613
|
+
self.performance_stats["total_errors"] += 1
|
614
|
+
|
615
|
+
def _calculate_avg_response_time(self) -> float:
|
616
|
+
"""Calculate average response time from recent requests"""
|
617
|
+
if not self.recent_requests:
|
618
|
+
return 0.0
|
619
|
+
|
620
|
+
total_time = sum(r.duration_ms or 0 for r in self.recent_requests[-100:])
|
621
|
+
count = len(self.recent_requests[-100:])
|
622
|
+
|
623
|
+
return total_time / count if count > 0 else 0.0
|
624
|
+
|
625
|
+
def _calculate_error_rate(self) -> float:
|
626
|
+
"""Calculate error rate from recent requests"""
|
627
|
+
if not self.recent_requests:
|
628
|
+
return 0.0
|
629
|
+
|
630
|
+
recent = self.recent_requests[-100:]
|
631
|
+
errors = sum(1 for r in recent if r.error is not None)
|
632
|
+
|
633
|
+
return errors / len(recent) if recent else 0.0
|
634
|
+
|
635
|
+
|
636
|
+
# Global monitoring instance (initialized by server)
|
637
|
+
monitoring_system: Optional[MonitoringSystem] = None
|
638
|
+
|
639
|
+
|
640
|
+
def get_monitoring_system() -> Optional[MonitoringSystem]:
|
641
|
+
"""Get global monitoring system instance"""
|
642
|
+
return monitoring_system
|
643
|
+
|
644
|
+
|
645
|
+
def initialize_monitoring(
|
646
|
+
enable_prometheus: bool = True,
|
647
|
+
enable_structured_logging: bool = True,
|
648
|
+
metrics_port: int = 9090
|
649
|
+
) -> MonitoringSystem:
|
650
|
+
"""Initialize global monitoring system"""
|
651
|
+
global monitoring_system
|
652
|
+
|
653
|
+
monitoring_system = MonitoringSystem(
|
654
|
+
enable_prometheus=enable_prometheus,
|
655
|
+
enable_structured_logging=enable_structured_logging,
|
656
|
+
metrics_port=metrics_port
|
657
|
+
)
|
658
|
+
|
659
|
+
return monitoring_system
|
File without changes
|