sql-xel-parser 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ """
2
+ XEL Parser - Parse and analyze SQL Server Extended Events files.
3
+
4
+ A Python package for parsing and analyzing SQL Server Extended Events (.xel) files
5
+ without requiring SQL Server.
6
+ """
7
+
8
+ from .parser import XELParser
9
+ from .converter import XELConverter
10
+ from .analyzer import XELAnalyzer
11
+ from .real_parser import parse_xel_file, extract_real_data_from_xel
12
+
13
+ __version__ = '1.0.0'
14
+ __all__ = ['XELParser', 'XELConverter', 'XELAnalyzer', 'parse_xel_file', 'extract_real_data_from_xel']
@@ -0,0 +1,6 @@
1
+ """Allow package to be run as python -m xel_parser"""
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == '__main__':
6
+ main()
@@ -0,0 +1,380 @@
1
+ """
2
+ XEL Analyzer - Advanced parsing, filtering, and analysis of XEL events.
3
+
4
+ Provides utilities for:
5
+ - Filtering events by various criteria
6
+ - Searching event content
7
+ - Aggregating and grouping events
8
+ - Statistical analysis
9
+ """
10
+
11
+ import re
12
+ from typing import List, Dict, Any, Callable, Optional
13
+ from datetime import datetime
14
+ from collections import defaultdict
15
+
16
+
17
+ class XELAnalyzer:
18
+ """Analyzer for XEL events with filtering and aggregation capabilities."""
19
+
20
+ def __init__(self, events: List[Dict[str, Any]]):
21
+ """
22
+ Initialize analyzer with events.
23
+
24
+ Args:
25
+ events: List of parsed event dictionaries
26
+ """
27
+ self.events = events
28
+
29
+ def filter_by_name(self, name_pattern: str) -> 'XELAnalyzer':
30
+ """
31
+ Filter events by name pattern.
32
+
33
+ Args:
34
+ name_pattern: Regex pattern or exact name
35
+
36
+ Returns:
37
+ New XELAnalyzer with filtered events
38
+ """
39
+ try:
40
+ pattern = re.compile(name_pattern, re.IGNORECASE)
41
+ filtered = [e for e in self.events if pattern.search(e.get('name', ''))]
42
+ except re.error:
43
+ # If regex fails, try exact match
44
+ filtered = [e for e in self.events if e.get('name', '') == name_pattern]
45
+
46
+ return XELAnalyzer(filtered)
47
+
48
+ def filter_by_time_range(self, start_time: Optional[str] = None,
49
+ end_time: Optional[str] = None) -> 'XELAnalyzer':
50
+ """
51
+ Filter events by time range.
52
+
53
+ Args:
54
+ start_time: Start timestamp (ISO format or parseable string)
55
+ end_time: End timestamp (ISO format or parseable string)
56
+
57
+ Returns:
58
+ New XELAnalyzer with filtered events
59
+ """
60
+ filtered = []
61
+ for event in self.events:
62
+ timestamp = event.get('timestamp', '')
63
+ if not timestamp:
64
+ continue
65
+
66
+ # Convert timestamp to comparable format
67
+ try:
68
+ event_time = self._parse_timestamp(timestamp)
69
+ except:
70
+ continue
71
+
72
+ # Check range
73
+ if start_time:
74
+ try:
75
+ start = self._parse_timestamp(start_time)
76
+ if event_time < start:
77
+ continue
78
+ except:
79
+ pass
80
+
81
+ if end_time:
82
+ try:
83
+ end = self._parse_timestamp(end_time)
84
+ if event_time > end:
85
+ continue
86
+ except:
87
+ pass
88
+
89
+ filtered.append(event)
90
+
91
+ return XELAnalyzer(filtered)
92
+
93
+ def filter_by_field(self, field_path: str, value: Any = None,
94
+ operator: str = 'equals') -> 'XELAnalyzer':
95
+ """
96
+ Filter events by field value.
97
+
98
+ Args:
99
+ field_path: Field path (e.g., 'data.duration', 'actions.session_id')
100
+ value: Value to compare (None means field exists)
101
+ operator: Comparison operator (equals, contains, gt, lt, gte, lte, exists)
102
+
103
+ Returns:
104
+ New XELAnalyzer with filtered events
105
+ """
106
+ filtered = []
107
+ for event in self.events:
108
+ field_value = self._get_nested_value(event, field_path)
109
+
110
+ if operator == 'exists':
111
+ if field_value is not None:
112
+ filtered.append(event)
113
+ elif operator == 'equals':
114
+ if field_value == value:
115
+ filtered.append(event)
116
+ elif operator == 'contains':
117
+ if value and field_value and str(value).lower() in str(field_value).lower():
118
+ filtered.append(event)
119
+ elif operator == 'gt':
120
+ try:
121
+ if field_value is not None and field_value > value:
122
+ filtered.append(event)
123
+ except TypeError:
124
+ pass
125
+ elif operator == 'lt':
126
+ try:
127
+ if field_value is not None and field_value < value:
128
+ filtered.append(event)
129
+ except TypeError:
130
+ pass
131
+ elif operator == 'gte':
132
+ try:
133
+ if field_value is not None and field_value >= value:
134
+ filtered.append(event)
135
+ except TypeError:
136
+ pass
137
+ elif operator == 'lte':
138
+ try:
139
+ if field_value is not None and field_value <= value:
140
+ filtered.append(event)
141
+ except TypeError:
142
+ pass
143
+
144
+ return XELAnalyzer(filtered)
145
+
146
+ def search(self, query: str, fields: Optional[List[str]] = None) -> 'XELAnalyzer':
147
+ """
148
+ Search for events containing query string.
149
+
150
+ Args:
151
+ query: Search query (regex supported)
152
+ fields: Specific fields to search (None = search all)
153
+
154
+ Returns:
155
+ New XELAnalyzer with matching events
156
+ """
157
+ try:
158
+ pattern = re.compile(query, re.IGNORECASE)
159
+ except re.error:
160
+ # If regex fails, use literal string
161
+ pattern = None
162
+
163
+ filtered = []
164
+ for event in self.events:
165
+ if self._search_in_dict(event, pattern or query, fields):
166
+ filtered.append(event)
167
+
168
+ return XELAnalyzer(filtered)
169
+
170
+ def group_by(self, field_path: str) -> Dict[str, List[Dict[str, Any]]]:
171
+ """
172
+ Group events by field value.
173
+
174
+ Args:
175
+ field_path: Field path to group by
176
+
177
+ Returns:
178
+ Dictionary mapping field values to event lists
179
+ """
180
+ groups = defaultdict(list)
181
+ for event in self.events:
182
+ value = self._get_nested_value(event, field_path)
183
+ key = str(value) if value is not None else 'null'
184
+ groups[key].append(event)
185
+
186
+ return dict(groups)
187
+
188
+ def aggregate(self, field_path: str, operation: str = 'count') -> Dict[str, Any]:
189
+ """
190
+ Aggregate events by field.
191
+
192
+ Args:
193
+ field_path: Field path to aggregate
194
+ operation: Aggregation operation (count, sum, avg, min, max, distinct)
195
+
196
+ Returns:
197
+ Aggregation results
198
+ """
199
+ values = []
200
+ for event in self.events:
201
+ value = self._get_nested_value(event, field_path)
202
+ if value is not None:
203
+ values.append(value)
204
+
205
+ if operation == 'count':
206
+ return {'count': len(self.events)}
207
+ elif operation == 'distinct':
208
+ return {'distinct_values': list(set(values)), 'distinct_count': len(set(values))}
209
+ elif not values:
210
+ return {'error': 'No values found'}
211
+
212
+ try:
213
+ if operation == 'sum':
214
+ return {'sum': sum(values)}
215
+ elif operation == 'avg':
216
+ return {'avg': sum(values) / len(values)}
217
+ elif operation == 'min':
218
+ return {'min': min(values)}
219
+ elif operation == 'max':
220
+ return {'max': max(values)}
221
+ except (TypeError, ValueError) as e:
222
+ return {'error': f'Cannot perform {operation} on non-numeric values'}
223
+
224
+ return {'error': f'Unknown operation: {operation}'}
225
+
226
+ def count_by(self, field_path: str) -> Dict[str, int]:
227
+ """
228
+ Count events by field value.
229
+
230
+ Args:
231
+ field_path: Field path to count by
232
+
233
+ Returns:
234
+ Dictionary mapping values to counts
235
+ """
236
+ counts = defaultdict(int)
237
+ for event in self.events:
238
+ value = self._get_nested_value(event, field_path)
239
+ key = str(value) if value is not None else 'null'
240
+ counts[key] += 1
241
+
242
+ return dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))
243
+
244
+ def top_n(self, field_path: str, n: int = 10) -> List[tuple]:
245
+ """
246
+ Get top N values by frequency.
247
+
248
+ Args:
249
+ field_path: Field path to analyze
250
+ n: Number of top items to return
251
+
252
+ Returns:
253
+ List of (value, count) tuples
254
+ """
255
+ counts = self.count_by(field_path)
256
+ return sorted(counts.items(), key=lambda x: x[1], reverse=True)[:n]
257
+
258
+ def get_stats(self) -> Dict[str, Any]:
259
+ """
260
+ Get statistical overview of events.
261
+
262
+ Returns:
263
+ Statistics dictionary
264
+ """
265
+ if not self.events:
266
+ return {'total_events': 0}
267
+
268
+ # Count by event type
269
+ event_types = self.count_by('name')
270
+
271
+ # Get time range
272
+ timestamps = [e.get('timestamp', '') for e in self.events if e.get('timestamp')]
273
+ time_range = None
274
+ if timestamps:
275
+ try:
276
+ timestamps.sort()
277
+ time_range = {'start': timestamps[0], 'end': timestamps[-1]}
278
+ except:
279
+ pass
280
+
281
+ # Get all fields
282
+ all_fields = set()
283
+ for event in self.events:
284
+ all_fields.update(event.keys())
285
+ if 'data' in event:
286
+ all_fields.update(f"data.{k}" for k in event['data'].keys())
287
+ if 'actions' in event:
288
+ all_fields.update(f"actions.{k}" for k in event['actions'].keys())
289
+
290
+ return {
291
+ 'total_events': len(self.events),
292
+ 'event_types': event_types,
293
+ 'time_range': time_range,
294
+ 'unique_fields': sorted(all_fields)
295
+ }
296
+
297
+ def custom_filter(self, predicate: Callable[[Dict[str, Any]], bool]) -> 'XELAnalyzer':
298
+ """
299
+ Filter events using custom predicate function.
300
+
301
+ Args:
302
+ predicate: Function that takes an event and returns True/False
303
+
304
+ Returns:
305
+ New XELAnalyzer with filtered events
306
+ """
307
+ filtered = [e for e in self.events if predicate(e)]
308
+ return XELAnalyzer(filtered)
309
+
310
+ def get_events(self) -> List[Dict[str, Any]]:
311
+ """
312
+ Get the current list of events.
313
+
314
+ Returns:
315
+ List of events
316
+ """
317
+ return self.events
318
+
319
+ # Helper methods
320
+
321
+ def _get_nested_value(self, d: Dict[str, Any], path: str) -> Any:
322
+ """Get value from nested dictionary using dot notation."""
323
+ keys = path.split('.')
324
+ value = d
325
+ for key in keys:
326
+ if isinstance(value, dict):
327
+ value = value.get(key)
328
+ else:
329
+ return None
330
+ return value
331
+
332
+ def _search_in_dict(self, d: Dict[str, Any], query: Any,
333
+ fields: Optional[List[str]] = None,
334
+ prefix: str = '') -> bool:
335
+ """Recursively search dictionary for query."""
336
+ for key, value in d.items():
337
+ current_path = f"{prefix}.{key}" if prefix else key
338
+
339
+ # Check if we should search this field
340
+ if fields and current_path not in fields:
341
+ if isinstance(value, dict):
342
+ if self._search_in_dict(value, query, fields, current_path):
343
+ return True
344
+ continue
345
+
346
+ # Search in value
347
+ if isinstance(value, dict):
348
+ if self._search_in_dict(value, query, fields, current_path):
349
+ return True
350
+ elif value is not None:
351
+ value_str = str(value)
352
+ if isinstance(query, re.Pattern):
353
+ if query.search(value_str):
354
+ return True
355
+ else:
356
+ if str(query).lower() in value_str.lower():
357
+ return True
358
+
359
+ return False
360
+
361
+ def _parse_timestamp(self, timestamp: str) -> datetime:
362
+ """Parse timestamp string to datetime."""
363
+ # Try common formats
364
+ formats = [
365
+ '%Y-%m-%dT%H:%M:%S.%fZ',
366
+ '%Y-%m-%dT%H:%M:%SZ',
367
+ '%Y-%m-%d %H:%M:%S.%f',
368
+ '%Y-%m-%d %H:%M:%S',
369
+ '%Y-%m-%d',
370
+ ]
371
+
372
+ for fmt in formats:
373
+ try:
374
+ return datetime.strptime(timestamp, fmt)
375
+ except ValueError:
376
+ continue
377
+
378
+ # If all fail, try generic parsing
379
+ from dateutil import parser
380
+ return parser.parse(timestamp)