tellaro-query-language 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tql/cli.py ADDED
@@ -0,0 +1,484 @@
1
+ """Command-line interface for TQL.
2
+
3
+ This module provides the CLI entry point for executing TQL queries against files
4
+ and folders with streaming support and smart output formatting.
5
+ """
6
+
7
+ import argparse
8
+ import json
9
+ import os
10
+ import sys
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ from .core import TQL
14
+ from .exceptions import TQLError
15
+
16
+
17
+ def detect_output_format(output_path: Optional[str], explicit_format: Optional[str]) -> str:
18
+ """Detect output format based on file extension or explicit flag.
19
+
20
+ Args:
21
+ output_path: Path to output file (None for stdout)
22
+ explicit_format: Explicitly specified format
23
+
24
+ Returns:
25
+ Detected format ('json', 'jsonl', or 'table')
26
+ """
27
+ if explicit_format:
28
+ return explicit_format
29
+
30
+ if output_path is None: # stdout
31
+ return "table"
32
+
33
+ ext = os.path.splitext(output_path)[1].lower()
34
+ if ext == ".json":
35
+ return "json"
36
+ elif ext in [".jsonl", ".ndjson"]:
37
+ return "jsonl"
38
+ else:
39
+ return "table" # Default for unknown extensions
40
+
41
+
42
+ def flatten_dict(d: Dict[str, Any], parent_key: str = "", sep: str = ".") -> Dict[str, Any]:
43
+ """Flatten nested dictionary into dot-notation keys.
44
+
45
+ Args:
46
+ d: Dictionary to flatten
47
+ parent_key: Parent key prefix
48
+ sep: Separator for nested keys
49
+
50
+ Returns:
51
+ Flattened dictionary
52
+ """
53
+ items: list[tuple[str, Any]] = []
54
+ for k, v in d.items():
55
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
56
+ if isinstance(v, dict):
57
+ items.extend(flatten_dict(v, new_key, sep=sep).items())
58
+ elif isinstance(v, list):
59
+ # For lists, convert to string representation
60
+ items.append((new_key, str(v)))
61
+ else:
62
+ items.append((new_key, v))
63
+ return dict(items)
64
+
65
+
66
+ def format_table(records: List[Dict[str, Any]], limit: Optional[int] = None) -> str:
67
+ """Format records as a simple table.
68
+
69
+ Args:
70
+ records: List of records to format
71
+ limit: Maximum number of records to display
72
+
73
+ Returns:
74
+ Formatted table string
75
+ """
76
+ if not records:
77
+ return "No records found."
78
+
79
+ # Apply limit if specified
80
+ if limit:
81
+ records = records[:limit]
82
+ limited = True
83
+ else:
84
+ limited = False
85
+
86
+ # Flatten all records (convert nested dicts to dot-notation)
87
+ flattened_records = []
88
+ for record in records:
89
+ flattened_records.append(flatten_dict(record))
90
+
91
+ # Get all unique keys across flattened records
92
+ all_keys: set[str] = set()
93
+ for record in flattened_records:
94
+ all_keys.update(record.keys())
95
+
96
+ # Sort keys for consistent display
97
+ keys = sorted(all_keys)
98
+
99
+ # Calculate column widths
100
+ col_widths = {key: len(key) for key in keys}
101
+ for record in flattened_records:
102
+ for key in keys:
103
+ value_str = str(record.get(key, ""))
104
+ col_widths[key] = max(col_widths[key], len(value_str))
105
+
106
+ # Build table
107
+ lines = []
108
+
109
+ # Header
110
+ header = " | ".join(key.ljust(col_widths[key]) for key in keys)
111
+ lines.append(header)
112
+ lines.append("-" * len(header))
113
+
114
+ # Rows
115
+ for record in flattened_records:
116
+ row = " | ".join(str(record.get(key, "")).ljust(col_widths[key]) for key in keys)
117
+ lines.append(row)
118
+
119
+ result = "\n".join(lines)
120
+
121
+ if limited:
122
+ result += f"\n\n... (showing {limit} of {len(records)} records)"
123
+
124
+ return result
125
+
126
+
127
+ def format_stats(stats: Dict[str, Any]) -> str:
128
+ """Format stats results for table output.
129
+
130
+ Args:
131
+ stats: Stats dictionary
132
+
133
+ Returns:
134
+ Formatted stats string
135
+ """
136
+ lines = []
137
+ lines.append("Statistics:")
138
+ lines.append("-" * 50)
139
+
140
+ stats_type = stats.get("type")
141
+
142
+ if stats_type == "simple_aggregation":
143
+ lines.append(f"{stats['function']}({stats['field']}): {stats['value']}")
144
+
145
+ elif stats_type == "multiple_aggregations":
146
+ for key, value in stats["results"].items():
147
+ lines.append(f"{key}: {value}")
148
+
149
+ elif stats_type == "grouped_aggregation":
150
+ group_by = stats.get("group_by", [])
151
+ lines.append(f"Grouped by: {', '.join(group_by)}")
152
+ lines.append("")
153
+
154
+ for bucket in stats["results"]:
155
+ key_str = ", ".join(f"{k}={v}" for k, v in bucket["key"].items())
156
+ lines.append(f" [{key_str}] (count: {bucket.get('doc_count', 0)})")
157
+
158
+ if "aggregations" in bucket:
159
+ for agg_key, agg_value in bucket["aggregations"].items():
160
+ lines.append(f" {agg_key}: {agg_value}")
161
+ else:
162
+ # Single aggregation result
163
+ for key, value in bucket.items():
164
+ if key not in ["key", "doc_count"]:
165
+ lines.append(f" {key}: {value}")
166
+
167
+ return "\n".join(lines)
168
+
169
+
170
+ def write_output(records: List[Dict[str, Any]], output_format: str, output_path: Optional[str], limit: Optional[int]):
171
+ """Write records to output in specified format.
172
+
173
+ Args:
174
+ records: Records to write
175
+ output_format: Format ('json', 'jsonl', 'table')
176
+ output_path: Output file path (None for stdout)
177
+ limit: Maximum records to output
178
+ """
179
+ # Apply limit if specified
180
+ if limit and len(records) > limit:
181
+ records = records[:limit]
182
+
183
+ if output_format == "json":
184
+ output = json.dumps(records, indent=2, ensure_ascii=False)
185
+ elif output_format == "jsonl":
186
+ output = "\n".join(json.dumps(record, ensure_ascii=False) for record in records)
187
+ else: # table
188
+ output = format_table(records, limit)
189
+
190
+ # Write to file or stdout
191
+ if output_path:
192
+ with open(output_path, "w", encoding="utf-8") as f:
193
+ f.write(output)
194
+ print(f"Output written to {output_path}")
195
+ else:
196
+ print(output)
197
+
198
+
199
+ def main(): # noqa: C901
200
+ """Main CLI entry point."""
201
+ parser = argparse.ArgumentParser(
202
+ description="TQL - Tellaro Query Language CLI for querying structured data files",
203
+ formatter_class=argparse.RawDescriptionHelpFormatter,
204
+ epilog="""
205
+ Examples:
206
+ # Query a JSON file
207
+ tql 'status = "active"' data.json
208
+
209
+ # Query with stats
210
+ tql 'status = "active" | stats count() by type' data.jsonl
211
+
212
+ # CSV with auto-detected headers
213
+ tql 'age > 25' users.csv
214
+
215
+ # Output to JSON file (auto-detects format from extension)
216
+ tql 'status = 200' logs.jsonl --output results.json
217
+
218
+ # Process folder with pattern
219
+ tql '| stats count() by status' logs/ --pattern "*.jsonl" --recursive
220
+
221
+ # Stdin to stdout
222
+ cat data.jsonl | tql 'score > 90'
223
+ """,
224
+ )
225
+
226
+ # Positional arguments
227
+ parser.add_argument("query", help="TQL query string")
228
+ parser.add_argument(
229
+ "file_or_folder",
230
+ nargs="?",
231
+ help="Path to file or folder (defaults to stdin if not provided)",
232
+ )
233
+
234
+ # File options
235
+ file_group = parser.add_argument_group("File Options")
236
+ file_group.add_argument(
237
+ "--format",
238
+ choices=["json", "jsonl", "csv", "auto"],
239
+ default="auto",
240
+ help="Input file format (default: auto-detect from extension)",
241
+ )
242
+ file_group.add_argument(
243
+ "--csv-delimiter",
244
+ default=",",
245
+ help="CSV delimiter character (default: ,)",
246
+ )
247
+ file_group.add_argument(
248
+ "--csv-headers",
249
+ help="Comma-separated CSV header names (overrides auto-detection)",
250
+ )
251
+ file_group.add_argument(
252
+ "--no-header",
253
+ action="store_true",
254
+ help="CSV has no header row (generates column1, column2, etc.)",
255
+ )
256
+ file_group.add_argument(
257
+ "--field-types",
258
+ help='JSON string mapping field names to types (e.g., \'{"age":"integer"}\')',
259
+ )
260
+ file_group.add_argument(
261
+ "--recursive",
262
+ action="store_true",
263
+ help="Process folders recursively",
264
+ )
265
+ file_group.add_argument(
266
+ "--pattern",
267
+ default="*",
268
+ help="File pattern for folder processing (default: *)",
269
+ )
270
+
271
+ # Output options
272
+ output_group = parser.add_argument_group("Output Options")
273
+ output_group.add_argument(
274
+ "--output",
275
+ "-o",
276
+ help="Output file path (default: stdout). Format auto-detected from extension.",
277
+ )
278
+ output_group.add_argument(
279
+ "--output-format",
280
+ choices=["json", "jsonl", "table"],
281
+ help="Output format (default: smart - table for console, matches extension for files)",
282
+ )
283
+ output_group.add_argument(
284
+ "--limit",
285
+ "-n",
286
+ type=int,
287
+ help="Maximum number of records to output",
288
+ )
289
+ output_group.add_argument(
290
+ "--stats-only",
291
+ action="store_true",
292
+ help="Only output statistics, no records",
293
+ )
294
+
295
+ # Performance options
296
+ perf_group = parser.add_argument_group("Performance Options")
297
+ perf_group.add_argument(
298
+ "--parallel",
299
+ type=int,
300
+ default=4,
301
+ help="Number of parallel workers for folder processing (default: 4)",
302
+ )
303
+ perf_group.add_argument(
304
+ "--sample-size",
305
+ type=int,
306
+ default=100,
307
+ help="Number of records to sample for type inference (default: 100)",
308
+ )
309
+
310
+ # Misc options
311
+ parser.add_argument(
312
+ "--verbose",
313
+ "-v",
314
+ action="store_true",
315
+ help="Verbose output with progress information",
316
+ )
317
+ parser.add_argument(
318
+ "--quiet",
319
+ "-q",
320
+ action="store_true",
321
+ help="Suppress informational messages",
322
+ )
323
+
324
+ args = parser.parse_args()
325
+
326
+ # Parse field types if provided
327
+ field_types = None
328
+ if args.field_types:
329
+ try:
330
+ field_types = json.loads(args.field_types)
331
+ except json.JSONDecodeError as e:
332
+ print(f"Error: Invalid JSON for --field-types: {e}", file=sys.stderr)
333
+ sys.exit(1)
334
+
335
+ # Parse CSV headers if provided
336
+ csv_headers = None
337
+ if args.csv_headers:
338
+ csv_headers = [h.strip() for h in args.csv_headers.split(",")]
339
+
340
+ # Detect output format
341
+ output_format = detect_output_format(args.output, args.output_format)
342
+
343
+ # Initialize TQL
344
+ try:
345
+ tql = TQL()
346
+
347
+ # Determine input source
348
+ is_folder = args.file_or_folder and os.path.isdir(args.file_or_folder)
349
+ is_file = args.file_or_folder and os.path.isfile(args.file_or_folder)
350
+ is_stdin = not args.file_or_folder
351
+
352
+ if is_stdin:
353
+ # Read from stdin
354
+ if not args.quiet:
355
+ print("Reading from stdin...", file=sys.stderr)
356
+
357
+ # Read all lines from stdin and parse as JSONL
358
+ records = []
359
+ for line in sys.stdin:
360
+ line = line.strip()
361
+ if line:
362
+ try:
363
+ records.append(json.loads(line))
364
+ except json.JSONDecodeError:
365
+ if not args.quiet:
366
+ print("Warning: Skipping invalid JSON line", file=sys.stderr)
367
+
368
+ # Execute query
369
+ result = tql.query(records, args.query)
370
+
371
+ # Handle output
372
+ if "stats" in result:
373
+ if output_format == "table":
374
+ print(format_stats(result["stats"]))
375
+ else:
376
+ write_output([result["stats"]], output_format, args.output, args.limit)
377
+ elif "results" in result:
378
+ write_output(result["results"], output_format, args.output, args.limit)
379
+ else:
380
+ write_output([], output_format, args.output, args.limit)
381
+
382
+ elif is_folder:
383
+ # Process folder
384
+ if args.verbose:
385
+ print(f"Processing folder: {args.file_or_folder}", file=sys.stderr)
386
+
387
+ result = tql.query_folder(
388
+ args.file_or_folder,
389
+ args.query,
390
+ pattern=args.pattern,
391
+ input_format=args.format,
392
+ recursive=args.recursive,
393
+ parallel=args.parallel,
394
+ csv_delimiter=args.csv_delimiter,
395
+ csv_headers=csv_headers,
396
+ no_header=args.no_header,
397
+ field_types=field_types,
398
+ sample_size=args.sample_size,
399
+ )
400
+
401
+ if args.verbose and "files_processed" in result:
402
+ print(f"Files processed: {result['files_processed']}", file=sys.stderr)
403
+
404
+ # Handle output
405
+ if "stats" in result:
406
+ if output_format == "table":
407
+ print(format_stats(result["stats"]))
408
+ else:
409
+ write_output([result["stats"]], output_format, args.output, args.limit)
410
+ elif "results" in result:
411
+ if not args.stats_only:
412
+ write_output(result["results"], output_format, args.output, args.limit)
413
+
414
+ elif is_file:
415
+ # Check if query contains stats
416
+ ast = tql.parse(args.query)
417
+ has_stats = ast.get("type") in ["stats_expr", "query_with_stats"]
418
+
419
+ if has_stats:
420
+ # Use stats method
421
+ if args.verbose:
422
+ print(f"Processing file with stats: {args.file_or_folder}", file=sys.stderr)
423
+
424
+ result = tql.query_file_stats(
425
+ args.file_or_folder,
426
+ args.query,
427
+ input_format=args.format,
428
+ csv_delimiter=args.csv_delimiter,
429
+ csv_headers=csv_headers,
430
+ no_header=args.no_header,
431
+ field_types=field_types,
432
+ sample_size=args.sample_size,
433
+ )
434
+
435
+ # Format and output stats
436
+ if output_format == "table":
437
+ print(format_stats(result))
438
+ else:
439
+ write_output([result], output_format, args.output, args.limit)
440
+
441
+ else:
442
+ # Use streaming method for filter queries
443
+ if args.verbose:
444
+ print(f"Processing file: {args.file_or_folder}", file=sys.stderr)
445
+
446
+ records = list(
447
+ tql.query_file_streaming(
448
+ args.file_or_folder,
449
+ args.query,
450
+ input_format=args.format,
451
+ csv_delimiter=args.csv_delimiter,
452
+ csv_headers=csv_headers,
453
+ no_header=args.no_header,
454
+ field_types=field_types,
455
+ sample_size=args.sample_size,
456
+ )
457
+ )
458
+
459
+ if args.verbose:
460
+ print(f"Matched {len(records)} records", file=sys.stderr)
461
+
462
+ write_output(records, output_format, args.output, args.limit)
463
+
464
+ else:
465
+ print(f"Error: {args.file_or_folder} is not a valid file or folder", file=sys.stderr)
466
+ sys.exit(1)
467
+
468
+ except TQLError as e:
469
+ print(f"TQL Error: {e}", file=sys.stderr)
470
+ sys.exit(1)
471
+ except KeyboardInterrupt:
472
+ print("\nInterrupted by user", file=sys.stderr)
473
+ sys.exit(130)
474
+ except Exception as e:
475
+ print(f"Error: {e}", file=sys.stderr)
476
+ if args.verbose:
477
+ import traceback
478
+
479
+ traceback.print_exc()
480
+ sys.exit(1)
481
+
482
+
483
+ if __name__ == "__main__":
484
+ main()