sql-xel-parser 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sql_xel_parser/cli.py ADDED
@@ -0,0 +1,315 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ XEL Parser CLI - Command-line interface for XEL file parsing and analysis.
4
+ """
5
+
6
+ import argparse
7
+ import sys
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Optional, List
11
+
12
+ from .parser import XELParser
13
+ from .converter import XELConverter
14
+ from .analyzer import XELAnalyzer
15
+
16
+
17
+ def parse_args():
18
+ """Parse command-line arguments."""
19
+ parser = argparse.ArgumentParser(
20
+ prog='xel-parser',
21
+ description='Parse and analyze SQL Server Extended Events (.xel) files',
22
+ formatter_class=argparse.RawDescriptionHelpFormatter,
23
+ epilog="""
24
+ Examples:
25
+ # Convert XEL to JSON
26
+ xel-parser input.xel -o output.json -f json
27
+
28
+ # Convert to readable text
29
+ xel-parser input.xel -o output.txt -f text
30
+
31
+ # Process entire directory (recursively)
32
+ xel-parser data/sql-ptfm-prod-westus3 -r -o merged_output.json
33
+
34
+ # Process folder and get summary
35
+ xel-parser data/sql-ptfm-prod-westus3 -r -f summary
36
+
37
+ # Filter events by name
38
+ xel-parser input.xel -o output.json --filter-name "sql_batch_completed"
39
+
40
+ # Search for specific content
41
+ xel-parser input.xel -o results.json --search "SELECT.*FROM"
42
+
43
+ # Get summary statistics
44
+ xel-parser input.xel -f summary
45
+
46
+ # Count events by type
47
+ xel-parser input.xel --count-by name
48
+
49
+ # Get top 10 queries by duration
50
+ xel-parser input.xel --filter-field "data.duration" --top-n "data.duration" 10
51
+ """
52
+ )
53
+
54
+ parser.add_argument('input', help='Input XEL file or directory path')
55
+ parser.add_argument('-o', '--output', help='Output file path (default: stdout)')
56
+ parser.add_argument('-f', '--format',
57
+ choices=['json', 'jsonl', 'csv', 'text', 'markdown', 'md', 'summary'],
58
+ default='json',
59
+ help='Output format (default: json)')
60
+ parser.add_argument('-r', '--recursive', action='store_true',
61
+ help='Recursively search for XEL files in directory')
62
+ parser.add_argument('--merge', action='store_true', default=True,
63
+ help='Merge all events from multiple files (default: True)')
64
+ parser.add_argument('--separate', dest='merge', action='store_false',
65
+ help='Process each file separately (outputs to multiple files)')
66
+
67
+ # Filtering options
68
+ filter_group = parser.add_argument_group('filtering options')
69
+ filter_group.add_argument('--filter-name', metavar='PATTERN',
70
+ help='Filter events by name (regex supported)')
71
+ filter_group.add_argument('--filter-field', metavar='FIELD=VALUE',
72
+ action='append',
73
+ help='Filter by field value (e.g., data.duration>1000)')
74
+ filter_group.add_argument('--filter-time-start', metavar='TIMESTAMP',
75
+ help='Filter events after this timestamp')
76
+ filter_group.add_argument('--filter-time-end', metavar='TIMESTAMP',
77
+ help='Filter events before this timestamp')
78
+ filter_group.add_argument('--search', metavar='QUERY',
79
+ help='Search events for query string (regex supported)')
80
+
81
+ # Analysis options
82
+ analysis_group = parser.add_argument_group('analysis options')
83
+ analysis_group.add_argument('--stats', action='store_true',
84
+ help='Show statistics about events')
85
+ analysis_group.add_argument('--count-by', metavar='FIELD',
86
+ help='Count events by field value')
87
+ analysis_group.add_argument('--group-by', metavar='FIELD',
88
+ help='Group events by field value')
89
+ analysis_group.add_argument('--top-n', metavar='FIELD:N',
90
+ help='Show top N values by frequency (e.g., name:10)')
91
+ analysis_group.add_argument('--aggregate', metavar='FIELD:OP',
92
+ help='Aggregate field (ops: count,sum,avg,min,max,distinct)')
93
+
94
+ # Output options
95
+ parser.add_argument('--verbose', action='store_true',
96
+ help='Verbose output (for text format)')
97
+ parser.add_argument('--indent', type=int, default=2,
98
+ help='JSON indentation level (default: 2)')
99
+ parser.add_argument('--limit', type=int,
100
+ help='Limit number of events to process')
101
+ parser.add_argument('--version', action='version', version='%(prog)s 1.0.0')
102
+
103
+ return parser.parse_args()
104
+
105
+
106
+ def find_xel_files(path: Path, recursive: bool = False) -> List[Path]:
107
+ """Find all XEL files in a directory."""
108
+ if path.is_file():
109
+ return [path]
110
+
111
+ if recursive:
112
+ xel_files = list(path.rglob('*.xel'))
113
+ else:
114
+ xel_files = list(path.glob('*.xel'))
115
+
116
+ xel_files.sort(key=lambda p: p.stat().st_mtime)
117
+ return xel_files
118
+
119
+
120
+ def parse_xel_files(file_paths: List[Path], limit: Optional[int] = None):
121
+ """Parse multiple XEL files and merge events."""
122
+ all_events = []
123
+ total_files = len(file_paths)
124
+
125
+ for i, file_path in enumerate(file_paths, 1):
126
+ try:
127
+ print(f"Parsing file {i}/{total_files}: {file_path}", file=sys.stderr)
128
+ parser = XELParser(str(file_path))
129
+ events = list(parser.parse())
130
+ print(f" Found {len(events)} events", file=sys.stderr)
131
+ all_events.extend(events)
132
+
133
+ if limit and len(all_events) >= limit:
134
+ print(f"Reached event limit of {limit}, stopping", file=sys.stderr)
135
+ all_events = all_events[:limit]
136
+ break
137
+
138
+ except Exception as e:
139
+ print(f" Error parsing {file_path}: {e}", file=sys.stderr)
140
+ continue
141
+
142
+ return all_events
143
+
144
+
145
+ def apply_filters(analyzer: XELAnalyzer, args) -> XELAnalyzer:
146
+ """Apply all filters from command-line arguments."""
147
+ if args.filter_name:
148
+ analyzer = analyzer.filter_by_name(args.filter_name)
149
+ print(f"After name filter: {len(analyzer.events)} events", file=sys.stderr)
150
+
151
+ if args.filter_time_start or args.filter_time_end:
152
+ analyzer = analyzer.filter_by_time_range(args.filter_time_start, args.filter_time_end)
153
+ print(f"After time filter: {len(analyzer.events)} events", file=sys.stderr)
154
+
155
+ if args.filter_field:
156
+ for field_filter in args.filter_field:
157
+ analyzer = apply_field_filter(analyzer, field_filter)
158
+ print(f"After field filter '{field_filter}': {len(analyzer.events)} events", file=sys.stderr)
159
+
160
+ if args.search:
161
+ analyzer = analyzer.search(args.search)
162
+ print(f"After search '{args.search}': {len(analyzer.events)} events", file=sys.stderr)
163
+
164
+ return analyzer
165
+
166
+
167
+ def apply_field_filter(analyzer: XELAnalyzer, filter_str: str) -> XELAnalyzer:
168
+ """Parse and apply a field filter."""
169
+ operators = {
170
+ '>=': 'gte',
171
+ '<=': 'lte',
172
+ '>': 'gt',
173
+ '<': 'lt',
174
+ '=': 'equals',
175
+ '~': 'contains',
176
+ }
177
+
178
+ for op_str, op_name in operators.items():
179
+ if op_str in filter_str:
180
+ field, value = filter_str.split(op_str, 1)
181
+ field = field.strip()
182
+ value = value.strip()
183
+
184
+ try:
185
+ value = int(value)
186
+ except ValueError:
187
+ try:
188
+ value = float(value)
189
+ except ValueError:
190
+ pass
191
+
192
+ return analyzer.filter_by_field(field, value, op_name)
193
+
194
+ return analyzer.filter_by_field(filter_str.strip(), operator='exists')
195
+
196
+
197
+ def perform_analysis(analyzer: XELAnalyzer, args):
198
+ """Perform analysis and output results."""
199
+ if args.stats:
200
+ stats = analyzer.get_stats()
201
+ print(json.dumps(stats, indent=args.indent))
202
+ return True
203
+
204
+ if args.count_by:
205
+ counts = analyzer.count_by(args.count_by)
206
+ print(json.dumps(counts, indent=args.indent))
207
+ return True
208
+
209
+ if args.group_by:
210
+ groups = analyzer.group_by(args.group_by)
211
+ result = {k: len(v) for k, v in groups.items()}
212
+ print(json.dumps(result, indent=args.indent))
213
+ return True
214
+
215
+ if args.top_n:
216
+ parts = args.top_n.split(':')
217
+ field = parts[0]
218
+ n = int(parts[1]) if len(parts) > 1 else 10
219
+ top_values = analyzer.top_n(field, n)
220
+ print(json.dumps(dict(top_values), indent=args.indent))
221
+ return True
222
+
223
+ if args.aggregate:
224
+ parts = args.aggregate.split(':')
225
+ field = parts[0]
226
+ operation = parts[1] if len(parts) > 1 else 'count'
227
+ result = analyzer.aggregate(field, operation)
228
+ print(json.dumps(result, indent=args.indent))
229
+ return True
230
+
231
+ return False
232
+
233
+
234
+ def main():
235
+ """Main entry point."""
236
+ args = parse_args()
237
+
238
+ input_path = Path(args.input)
239
+ if not input_path.exists():
240
+ print(f"Error: Input path not found: {args.input}", file=sys.stderr)
241
+ return 1
242
+
243
+ try:
244
+ if input_path.is_dir():
245
+ print(f"Searching for XEL files in {args.input}...", file=sys.stderr)
246
+ xel_files = find_xel_files(input_path, recursive=args.recursive)
247
+ if not xel_files:
248
+ print("Error: No XEL files found in directory", file=sys.stderr)
249
+ return 1
250
+ print(f"Found {len(xel_files)} XEL files", file=sys.stderr)
251
+
252
+ events = parse_xel_files(xel_files, limit=args.limit)
253
+ print(f"\nTotal events from all files: {len(events)}", file=sys.stderr)
254
+ else:
255
+ print(f"Parsing {args.input}...", file=sys.stderr)
256
+ parser = XELParser(str(input_path))
257
+ events = list(parser.parse())
258
+ print(f"Parsed {len(events)} events", file=sys.stderr)
259
+
260
+ if args.limit:
261
+ events = events[:args.limit]
262
+ print(f"Limited to {len(events)} events", file=sys.stderr)
263
+
264
+ if not events:
265
+ print("Warning: No events found", file=sys.stderr)
266
+ return 0
267
+
268
+ analyzer = XELAnalyzer(events)
269
+
270
+ if any([args.filter_name, args.filter_field, args.filter_time_start,
271
+ args.filter_time_end, args.search]):
272
+ analyzer = apply_filters(analyzer, args)
273
+ events = analyzer.get_events()
274
+
275
+ if perform_analysis(analyzer, args):
276
+ return 0
277
+
278
+ converter = XELConverter()
279
+
280
+ if args.format == 'json':
281
+ output = converter.to_json(events, indent=args.indent)
282
+ elif args.format == 'jsonl':
283
+ output = converter.to_json_lines(events)
284
+ elif args.format == 'csv':
285
+ output = converter.to_csv(events)
286
+ elif args.format == 'text':
287
+ output = converter.to_text(events, verbose=args.verbose)
288
+ elif args.format in ('markdown', 'md'):
289
+ output = converter.to_markdown(events)
290
+ elif args.format == 'summary':
291
+ output = converter.to_summary(events)
292
+ else:
293
+ print(f"Error: Unsupported format: {args.format}", file=sys.stderr)
294
+ return 1
295
+
296
+ if args.output:
297
+ output_path = Path(args.output)
298
+ output_path.parent.mkdir(parents=True, exist_ok=True)
299
+ with open(output_path, 'w', encoding='utf-8') as f:
300
+ f.write(output)
301
+ print(f"Output written to {args.output}", file=sys.stderr)
302
+ else:
303
+ print(output)
304
+
305
+ return 0
306
+
307
+ except Exception as e:
308
+ print(f"Error: {e}", file=sys.stderr)
309
+ import traceback
310
+ traceback.print_exc(file=sys.stderr)
311
+ return 1
312
+
313
+
314
+ if __name__ == '__main__':
315
+ sys.exit(main())
@@ -0,0 +1,284 @@
1
+ """
2
+ XEL Converter - Convert parsed XEL events to various human-readable formats.
3
+
4
+ Supported formats:
5
+ - JSON
6
+ - CSV
7
+ - Pretty text
8
+ - Markdown
9
+ """
10
+
11
+ import json
12
+ import csv
13
+ from typing import List, Dict, Any, TextIO
14
+ from datetime import datetime
15
+
16
+
17
+ class XELConverter:
18
+ """Converter for XEL events to various formats."""
19
+
20
+ @staticmethod
21
+ def to_json(events: List[Dict[str, Any]], indent: int = 2) -> str:
22
+ """
23
+ Convert events to JSON format.
24
+
25
+ Args:
26
+ events: List of event dictionaries
27
+ indent: JSON indentation level
28
+
29
+ Returns:
30
+ JSON string
31
+ """
32
+ return json.dumps(events, indent=indent, default=str)
33
+
34
+ @staticmethod
35
+ def to_json_lines(events: List[Dict[str, Any]]) -> str:
36
+ """
37
+ Convert events to JSON Lines format (one JSON object per line).
38
+
39
+ Args:
40
+ events: List of event dictionaries
41
+
42
+ Returns:
43
+ JSON Lines string
44
+ """
45
+ lines = []
46
+ for event in events:
47
+ lines.append(json.dumps(event, default=str))
48
+ return '\n'.join(lines)
49
+
50
+ @staticmethod
51
+ def to_csv(events: List[Dict[str, Any]]) -> str:
52
+ """
53
+ Convert events to CSV format.
54
+
55
+ Args:
56
+ events: List of event dictionaries
57
+
58
+ Returns:
59
+ CSV string
60
+ """
61
+ if not events:
62
+ return ""
63
+
64
+ # Flatten events and collect all possible fields
65
+ flattened_events = []
66
+ all_fields = set()
67
+
68
+ for event in events:
69
+ flat_event = XELConverter._flatten_dict(event)
70
+ flattened_events.append(flat_event)
71
+ all_fields.update(flat_event.keys())
72
+
73
+ # Sort fields for consistent output
74
+ fieldnames = sorted(all_fields)
75
+
76
+ # Write to CSV
77
+ import io
78
+ output = io.StringIO()
79
+ writer = csv.DictWriter(output, fieldnames=fieldnames)
80
+ writer.writeheader()
81
+ writer.writerows(flattened_events)
82
+
83
+ return output.getvalue()
84
+
85
+ @staticmethod
86
+ def to_text(events: List[Dict[str, Any]], verbose: bool = True) -> str:
87
+ """
88
+ Convert events to pretty text format.
89
+
90
+ Args:
91
+ events: List of event dictionaries
92
+ verbose: Include all details
93
+
94
+ Returns:
95
+ Formatted text string
96
+ """
97
+ lines = []
98
+ lines.append("=" * 80)
99
+ lines.append(f"XEL Events Report - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
100
+ lines.append(f"Total Events: {len(events)}")
101
+ lines.append("=" * 80)
102
+ lines.append("")
103
+
104
+ for i, event in enumerate(events, 1):
105
+ lines.append(f"Event #{i}: {event.get('name', 'Unknown')}")
106
+ lines.append("-" * 80)
107
+
108
+ if event.get('timestamp'):
109
+ lines.append(f" Timestamp: {event['timestamp']}")
110
+
111
+ if verbose and event.get('data'):
112
+ lines.append(f" Data:")
113
+ for key, value in event['data'].items():
114
+ lines.append(f" {key}: {value}")
115
+
116
+ if verbose and event.get('actions'):
117
+ lines.append(f" Actions:")
118
+ for key, value in event['actions'].items():
119
+ lines.append(f" {key}: {value}")
120
+
121
+ if event.get('content'):
122
+ lines.append(f" Content: {event['content']}")
123
+
124
+ lines.append("")
125
+
126
+ return '\n'.join(lines)
127
+
128
+ @staticmethod
129
+ def to_markdown(events: List[Dict[str, Any]]) -> str:
130
+ """
131
+ Convert events to Markdown format.
132
+
133
+ Args:
134
+ events: List of event dictionaries
135
+
136
+ Returns:
137
+ Markdown string
138
+ """
139
+ lines = []
140
+ lines.append("# XEL Events Report")
141
+ lines.append("")
142
+ lines.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
143
+ lines.append(f"**Total Events:** {len(events)}")
144
+ lines.append("")
145
+
146
+ for i, event in enumerate(events, 1):
147
+ lines.append(f"## Event {i}: {event.get('name', 'Unknown')}")
148
+ lines.append("")
149
+
150
+ if event.get('timestamp'):
151
+ lines.append(f"**Timestamp:** `{event['timestamp']}`")
152
+ lines.append("")
153
+
154
+ if event.get('data'):
155
+ lines.append("### Data")
156
+ lines.append("")
157
+ for key, value in event['data'].items():
158
+ lines.append(f"- **{key}:** `{value}`")
159
+ lines.append("")
160
+
161
+ if event.get('actions'):
162
+ lines.append("### Actions")
163
+ lines.append("")
164
+ for key, value in event['actions'].items():
165
+ lines.append(f"- **{key}:** `{value}`")
166
+ lines.append("")
167
+
168
+ if event.get('content'):
169
+ lines.append("### Content")
170
+ lines.append("")
171
+ lines.append(f"```")
172
+ lines.append(str(event['content']))
173
+ lines.append(f"```")
174
+ lines.append("")
175
+
176
+ lines.append("---")
177
+ lines.append("")
178
+
179
+ return '\n'.join(lines)
180
+
181
+ @staticmethod
182
+ def to_summary(events: List[Dict[str, Any]]) -> str:
183
+ """
184
+ Generate a summary of events.
185
+
186
+ Args:
187
+ events: List of event dictionaries
188
+
189
+ Returns:
190
+ Summary string
191
+ """
192
+ if not events:
193
+ return "No events found."
194
+
195
+ # Count events by name
196
+ event_counts = {}
197
+ for event in events:
198
+ name = event.get('name', 'Unknown')
199
+ event_counts[name] = event_counts.get(name, 0) + 1
200
+
201
+ # Get time range
202
+ timestamps = [e.get('timestamp', '') for e in events if e.get('timestamp')]
203
+ time_range = "Unknown"
204
+ if timestamps:
205
+ try:
206
+ timestamps.sort()
207
+ time_range = f"{timestamps[0]} to {timestamps[-1]}"
208
+ except:
209
+ pass
210
+
211
+ lines = []
212
+ lines.append("XEL Events Summary")
213
+ lines.append("=" * 80)
214
+ lines.append(f"Total Events: {len(events)}")
215
+ lines.append(f"Time Range: {time_range}")
216
+ lines.append("")
217
+ lines.append("Event Types:")
218
+ for name, count in sorted(event_counts.items(), key=lambda x: x[1], reverse=True):
219
+ lines.append(f" {name}: {count}")
220
+
221
+ return '\n'.join(lines)
222
+
223
+ @staticmethod
224
+ def _flatten_dict(d: Dict[str, Any], parent_key: str = '', sep: str = '_') -> Dict[str, Any]:
225
+ """
226
+ Flatten a nested dictionary.
227
+
228
+ Args:
229
+ d: Dictionary to flatten
230
+ parent_key: Parent key prefix
231
+ sep: Separator for nested keys
232
+
233
+ Returns:
234
+ Flattened dictionary
235
+ """
236
+ items = []
237
+ for k, v in d.items():
238
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
239
+ if isinstance(v, dict):
240
+ items.extend(XELConverter._flatten_dict(v, new_key, sep=sep).items())
241
+ else:
242
+ items.append((new_key, v))
243
+ return dict(items)
244
+
245
+
246
+ def convert_file(input_path: str, output_path: str, format: str, **kwargs):
247
+ """
248
+ Convenience function to convert an XEL file to a specific format.
249
+
250
+ Args:
251
+ input_path: Path to input XEL file
252
+ output_path: Path to output file
253
+ format: Output format (json, jsonl, csv, text, markdown, summary)
254
+ **kwargs: Additional arguments for conversion
255
+ """
256
+ from xel_parser import XELParser
257
+
258
+ # Parse events
259
+ parser = XELParser(input_path)
260
+ events = list(parser.parse())
261
+
262
+ # Convert to specified format
263
+ converter = XELConverter()
264
+
265
+ if format == 'json':
266
+ output = converter.to_json(events, indent=kwargs.get('indent', 2))
267
+ elif format == 'jsonl':
268
+ output = converter.to_json_lines(events)
269
+ elif format == 'csv':
270
+ output = converter.to_csv(events)
271
+ elif format == 'text':
272
+ output = converter.to_text(events, verbose=kwargs.get('verbose', True))
273
+ elif format == 'markdown' or format == 'md':
274
+ output = converter.to_markdown(events)
275
+ elif format == 'summary':
276
+ output = converter.to_summary(events)
277
+ else:
278
+ raise ValueError(f"Unsupported format: {format}")
279
+
280
+ # Write output
281
+ with open(output_path, 'w', encoding='utf-8') as f:
282
+ f.write(output)
283
+
284
+ return len(events)