sql-xel-parser 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_xel_parser/__init__.py +14 -0
- sql_xel_parser/__main__.py +6 -0
- sql_xel_parser/analyzer.py +380 -0
- sql_xel_parser/cli.py +315 -0
- sql_xel_parser/converter.py +284 -0
- sql_xel_parser/parser.py +379 -0
- sql_xel_parser/real_parser.py +295 -0
- sql_xel_parser-1.0.0.dist-info/METADATA +139 -0
- sql_xel_parser-1.0.0.dist-info/RECORD +13 -0
- sql_xel_parser-1.0.0.dist-info/WHEEL +5 -0
- sql_xel_parser-1.0.0.dist-info/entry_points.txt +2 -0
- sql_xel_parser-1.0.0.dist-info/licenses/LICENSE +21 -0
- sql_xel_parser-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
XEL Parser - Parse and analyze SQL Server Extended Events files.
|
|
3
|
+
|
|
4
|
+
A Python package for parsing and analyzing SQL Server Extended Events (.xel) files
|
|
5
|
+
without requiring SQL Server.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .parser import XELParser
|
|
9
|
+
from .converter import XELConverter
|
|
10
|
+
from .analyzer import XELAnalyzer
|
|
11
|
+
from .real_parser import parse_xel_file, extract_real_data_from_xel
|
|
12
|
+
|
|
13
|
+
__version__ = '1.0.0'
|
|
14
|
+
__all__ = ['XELParser', 'XELConverter', 'XELAnalyzer', 'parse_xel_file', 'extract_real_data_from_xel']
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""
|
|
2
|
+
XEL Analyzer - Advanced parsing, filtering, and analysis of XEL events.
|
|
3
|
+
|
|
4
|
+
Provides utilities for:
|
|
5
|
+
- Filtering events by various criteria
|
|
6
|
+
- Searching event content
|
|
7
|
+
- Aggregating and grouping events
|
|
8
|
+
- Statistical analysis
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
from typing import List, Dict, Any, Callable, Optional
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class XELAnalyzer:
|
|
18
|
+
"""Analyzer for XEL events with filtering and aggregation capabilities."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, events: List[Dict[str, Any]]):
|
|
21
|
+
"""
|
|
22
|
+
Initialize analyzer with events.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
events: List of parsed event dictionaries
|
|
26
|
+
"""
|
|
27
|
+
self.events = events
|
|
28
|
+
|
|
29
|
+
def filter_by_name(self, name_pattern: str) -> 'XELAnalyzer':
|
|
30
|
+
"""
|
|
31
|
+
Filter events by name pattern.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
name_pattern: Regex pattern or exact name
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
New XELAnalyzer with filtered events
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
pattern = re.compile(name_pattern, re.IGNORECASE)
|
|
41
|
+
filtered = [e for e in self.events if pattern.search(e.get('name', ''))]
|
|
42
|
+
except re.error:
|
|
43
|
+
# If regex fails, try exact match
|
|
44
|
+
filtered = [e for e in self.events if e.get('name', '') == name_pattern]
|
|
45
|
+
|
|
46
|
+
return XELAnalyzer(filtered)
|
|
47
|
+
|
|
48
|
+
def filter_by_time_range(self, start_time: Optional[str] = None,
|
|
49
|
+
end_time: Optional[str] = None) -> 'XELAnalyzer':
|
|
50
|
+
"""
|
|
51
|
+
Filter events by time range.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
start_time: Start timestamp (ISO format or parseable string)
|
|
55
|
+
end_time: End timestamp (ISO format or parseable string)
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
New XELAnalyzer with filtered events
|
|
59
|
+
"""
|
|
60
|
+
filtered = []
|
|
61
|
+
for event in self.events:
|
|
62
|
+
timestamp = event.get('timestamp', '')
|
|
63
|
+
if not timestamp:
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
# Convert timestamp to comparable format
|
|
67
|
+
try:
|
|
68
|
+
event_time = self._parse_timestamp(timestamp)
|
|
69
|
+
except:
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
# Check range
|
|
73
|
+
if start_time:
|
|
74
|
+
try:
|
|
75
|
+
start = self._parse_timestamp(start_time)
|
|
76
|
+
if event_time < start:
|
|
77
|
+
continue
|
|
78
|
+
except:
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
if end_time:
|
|
82
|
+
try:
|
|
83
|
+
end = self._parse_timestamp(end_time)
|
|
84
|
+
if event_time > end:
|
|
85
|
+
continue
|
|
86
|
+
except:
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
filtered.append(event)
|
|
90
|
+
|
|
91
|
+
return XELAnalyzer(filtered)
|
|
92
|
+
|
|
93
|
+
def filter_by_field(self, field_path: str, value: Any = None,
|
|
94
|
+
operator: str = 'equals') -> 'XELAnalyzer':
|
|
95
|
+
"""
|
|
96
|
+
Filter events by field value.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
field_path: Field path (e.g., 'data.duration', 'actions.session_id')
|
|
100
|
+
value: Value to compare (None means field exists)
|
|
101
|
+
operator: Comparison operator (equals, contains, gt, lt, gte, lte, exists)
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
New XELAnalyzer with filtered events
|
|
105
|
+
"""
|
|
106
|
+
filtered = []
|
|
107
|
+
for event in self.events:
|
|
108
|
+
field_value = self._get_nested_value(event, field_path)
|
|
109
|
+
|
|
110
|
+
if operator == 'exists':
|
|
111
|
+
if field_value is not None:
|
|
112
|
+
filtered.append(event)
|
|
113
|
+
elif operator == 'equals':
|
|
114
|
+
if field_value == value:
|
|
115
|
+
filtered.append(event)
|
|
116
|
+
elif operator == 'contains':
|
|
117
|
+
if value and field_value and str(value).lower() in str(field_value).lower():
|
|
118
|
+
filtered.append(event)
|
|
119
|
+
elif operator == 'gt':
|
|
120
|
+
try:
|
|
121
|
+
if field_value is not None and field_value > value:
|
|
122
|
+
filtered.append(event)
|
|
123
|
+
except TypeError:
|
|
124
|
+
pass
|
|
125
|
+
elif operator == 'lt':
|
|
126
|
+
try:
|
|
127
|
+
if field_value is not None and field_value < value:
|
|
128
|
+
filtered.append(event)
|
|
129
|
+
except TypeError:
|
|
130
|
+
pass
|
|
131
|
+
elif operator == 'gte':
|
|
132
|
+
try:
|
|
133
|
+
if field_value is not None and field_value >= value:
|
|
134
|
+
filtered.append(event)
|
|
135
|
+
except TypeError:
|
|
136
|
+
pass
|
|
137
|
+
elif operator == 'lte':
|
|
138
|
+
try:
|
|
139
|
+
if field_value is not None and field_value <= value:
|
|
140
|
+
filtered.append(event)
|
|
141
|
+
except TypeError:
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
return XELAnalyzer(filtered)
|
|
145
|
+
|
|
146
|
+
def search(self, query: str, fields: Optional[List[str]] = None) -> 'XELAnalyzer':
|
|
147
|
+
"""
|
|
148
|
+
Search for events containing query string.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
query: Search query (regex supported)
|
|
152
|
+
fields: Specific fields to search (None = search all)
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
New XELAnalyzer with matching events
|
|
156
|
+
"""
|
|
157
|
+
try:
|
|
158
|
+
pattern = re.compile(query, re.IGNORECASE)
|
|
159
|
+
except re.error:
|
|
160
|
+
# If regex fails, use literal string
|
|
161
|
+
pattern = None
|
|
162
|
+
|
|
163
|
+
filtered = []
|
|
164
|
+
for event in self.events:
|
|
165
|
+
if self._search_in_dict(event, pattern or query, fields):
|
|
166
|
+
filtered.append(event)
|
|
167
|
+
|
|
168
|
+
return XELAnalyzer(filtered)
|
|
169
|
+
|
|
170
|
+
def group_by(self, field_path: str) -> Dict[str, List[Dict[str, Any]]]:
|
|
171
|
+
"""
|
|
172
|
+
Group events by field value.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
field_path: Field path to group by
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Dictionary mapping field values to event lists
|
|
179
|
+
"""
|
|
180
|
+
groups = defaultdict(list)
|
|
181
|
+
for event in self.events:
|
|
182
|
+
value = self._get_nested_value(event, field_path)
|
|
183
|
+
key = str(value) if value is not None else 'null'
|
|
184
|
+
groups[key].append(event)
|
|
185
|
+
|
|
186
|
+
return dict(groups)
|
|
187
|
+
|
|
188
|
+
def aggregate(self, field_path: str, operation: str = 'count') -> Dict[str, Any]:
|
|
189
|
+
"""
|
|
190
|
+
Aggregate events by field.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
field_path: Field path to aggregate
|
|
194
|
+
operation: Aggregation operation (count, sum, avg, min, max, distinct)
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Aggregation results
|
|
198
|
+
"""
|
|
199
|
+
values = []
|
|
200
|
+
for event in self.events:
|
|
201
|
+
value = self._get_nested_value(event, field_path)
|
|
202
|
+
if value is not None:
|
|
203
|
+
values.append(value)
|
|
204
|
+
|
|
205
|
+
if operation == 'count':
|
|
206
|
+
return {'count': len(self.events)}
|
|
207
|
+
elif operation == 'distinct':
|
|
208
|
+
return {'distinct_values': list(set(values)), 'distinct_count': len(set(values))}
|
|
209
|
+
elif not values:
|
|
210
|
+
return {'error': 'No values found'}
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
if operation == 'sum':
|
|
214
|
+
return {'sum': sum(values)}
|
|
215
|
+
elif operation == 'avg':
|
|
216
|
+
return {'avg': sum(values) / len(values)}
|
|
217
|
+
elif operation == 'min':
|
|
218
|
+
return {'min': min(values)}
|
|
219
|
+
elif operation == 'max':
|
|
220
|
+
return {'max': max(values)}
|
|
221
|
+
except (TypeError, ValueError) as e:
|
|
222
|
+
return {'error': f'Cannot perform {operation} on non-numeric values'}
|
|
223
|
+
|
|
224
|
+
return {'error': f'Unknown operation: {operation}'}
|
|
225
|
+
|
|
226
|
+
def count_by(self, field_path: str) -> Dict[str, int]:
|
|
227
|
+
"""
|
|
228
|
+
Count events by field value.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
field_path: Field path to count by
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
Dictionary mapping values to counts
|
|
235
|
+
"""
|
|
236
|
+
counts = defaultdict(int)
|
|
237
|
+
for event in self.events:
|
|
238
|
+
value = self._get_nested_value(event, field_path)
|
|
239
|
+
key = str(value) if value is not None else 'null'
|
|
240
|
+
counts[key] += 1
|
|
241
|
+
|
|
242
|
+
return dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))
|
|
243
|
+
|
|
244
|
+
def top_n(self, field_path: str, n: int = 10) -> List[tuple]:
|
|
245
|
+
"""
|
|
246
|
+
Get top N values by frequency.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
field_path: Field path to analyze
|
|
250
|
+
n: Number of top items to return
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
List of (value, count) tuples
|
|
254
|
+
"""
|
|
255
|
+
counts = self.count_by(field_path)
|
|
256
|
+
return sorted(counts.items(), key=lambda x: x[1], reverse=True)[:n]
|
|
257
|
+
|
|
258
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
259
|
+
"""
|
|
260
|
+
Get statistical overview of events.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Statistics dictionary
|
|
264
|
+
"""
|
|
265
|
+
if not self.events:
|
|
266
|
+
return {'total_events': 0}
|
|
267
|
+
|
|
268
|
+
# Count by event type
|
|
269
|
+
event_types = self.count_by('name')
|
|
270
|
+
|
|
271
|
+
# Get time range
|
|
272
|
+
timestamps = [e.get('timestamp', '') for e in self.events if e.get('timestamp')]
|
|
273
|
+
time_range = None
|
|
274
|
+
if timestamps:
|
|
275
|
+
try:
|
|
276
|
+
timestamps.sort()
|
|
277
|
+
time_range = {'start': timestamps[0], 'end': timestamps[-1]}
|
|
278
|
+
except:
|
|
279
|
+
pass
|
|
280
|
+
|
|
281
|
+
# Get all fields
|
|
282
|
+
all_fields = set()
|
|
283
|
+
for event in self.events:
|
|
284
|
+
all_fields.update(event.keys())
|
|
285
|
+
if 'data' in event:
|
|
286
|
+
all_fields.update(f"data.{k}" for k in event['data'].keys())
|
|
287
|
+
if 'actions' in event:
|
|
288
|
+
all_fields.update(f"actions.{k}" for k in event['actions'].keys())
|
|
289
|
+
|
|
290
|
+
return {
|
|
291
|
+
'total_events': len(self.events),
|
|
292
|
+
'event_types': event_types,
|
|
293
|
+
'time_range': time_range,
|
|
294
|
+
'unique_fields': sorted(all_fields)
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
def custom_filter(self, predicate: Callable[[Dict[str, Any]], bool]) -> 'XELAnalyzer':
|
|
298
|
+
"""
|
|
299
|
+
Filter events using custom predicate function.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
predicate: Function that takes an event and returns True/False
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
New XELAnalyzer with filtered events
|
|
306
|
+
"""
|
|
307
|
+
filtered = [e for e in self.events if predicate(e)]
|
|
308
|
+
return XELAnalyzer(filtered)
|
|
309
|
+
|
|
310
|
+
def get_events(self) -> List[Dict[str, Any]]:
|
|
311
|
+
"""
|
|
312
|
+
Get the current list of events.
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
List of events
|
|
316
|
+
"""
|
|
317
|
+
return self.events
|
|
318
|
+
|
|
319
|
+
# Helper methods
|
|
320
|
+
|
|
321
|
+
def _get_nested_value(self, d: Dict[str, Any], path: str) -> Any:
|
|
322
|
+
"""Get value from nested dictionary using dot notation."""
|
|
323
|
+
keys = path.split('.')
|
|
324
|
+
value = d
|
|
325
|
+
for key in keys:
|
|
326
|
+
if isinstance(value, dict):
|
|
327
|
+
value = value.get(key)
|
|
328
|
+
else:
|
|
329
|
+
return None
|
|
330
|
+
return value
|
|
331
|
+
|
|
332
|
+
def _search_in_dict(self, d: Dict[str, Any], query: Any,
|
|
333
|
+
fields: Optional[List[str]] = None,
|
|
334
|
+
prefix: str = '') -> bool:
|
|
335
|
+
"""Recursively search dictionary for query."""
|
|
336
|
+
for key, value in d.items():
|
|
337
|
+
current_path = f"{prefix}.{key}" if prefix else key
|
|
338
|
+
|
|
339
|
+
# Check if we should search this field
|
|
340
|
+
if fields and current_path not in fields:
|
|
341
|
+
if isinstance(value, dict):
|
|
342
|
+
if self._search_in_dict(value, query, fields, current_path):
|
|
343
|
+
return True
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
# Search in value
|
|
347
|
+
if isinstance(value, dict):
|
|
348
|
+
if self._search_in_dict(value, query, fields, current_path):
|
|
349
|
+
return True
|
|
350
|
+
elif value is not None:
|
|
351
|
+
value_str = str(value)
|
|
352
|
+
if isinstance(query, re.Pattern):
|
|
353
|
+
if query.search(value_str):
|
|
354
|
+
return True
|
|
355
|
+
else:
|
|
356
|
+
if str(query).lower() in value_str.lower():
|
|
357
|
+
return True
|
|
358
|
+
|
|
359
|
+
return False
|
|
360
|
+
|
|
361
|
+
def _parse_timestamp(self, timestamp: str) -> datetime:
|
|
362
|
+
"""Parse timestamp string to datetime."""
|
|
363
|
+
# Try common formats
|
|
364
|
+
formats = [
|
|
365
|
+
'%Y-%m-%dT%H:%M:%S.%fZ',
|
|
366
|
+
'%Y-%m-%dT%H:%M:%SZ',
|
|
367
|
+
'%Y-%m-%d %H:%M:%S.%f',
|
|
368
|
+
'%Y-%m-%d %H:%M:%S',
|
|
369
|
+
'%Y-%m-%d',
|
|
370
|
+
]
|
|
371
|
+
|
|
372
|
+
for fmt in formats:
|
|
373
|
+
try:
|
|
374
|
+
return datetime.strptime(timestamp, fmt)
|
|
375
|
+
except ValueError:
|
|
376
|
+
continue
|
|
377
|
+
|
|
378
|
+
# If all fail, try generic parsing
|
|
379
|
+
from dateutil import parser
|
|
380
|
+
return parser.parse(timestamp)
|