tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
tql/core.py ADDED
@@ -0,0 +1,929 @@
1
+ """Core TQL implementation.
2
+
3
+ This module provides the main TQL class that serves as the primary interface
4
+ for parsing and executing TQL queries against different backends.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional, Union
8
+
9
+ from .analyzer import EnhancedFieldMapping
10
+ from .core_components import FileOperations, OpenSearchOperations, StatsOperations, ValidationOperations
11
+ from .evaluator import TQLEvaluator
12
+ from .exceptions import TQLOperatorError, TQLParseError, TQLSyntaxError, TQLTypeError, TQLValidationError
13
+ from .mutator_analyzer import MutatorAnalysisResult
14
+ from .parser import TQLParser
15
+
16
+
17
+ class TQL:
18
+ """Main TQL query interface.
19
+
20
+ This class provides the primary interface for parsing TQL queries and executing
21
+ them against various backends including direct file operations and OpenSearch.
22
+
23
+ Example:
24
+ >>> tql = TQL()
25
+ >>> query = "name eq 'John' AND age > 25"
26
+ >>> results = tql.query(data, query)
27
+ """
28
+
29
+ def __init__(self, field_mappings: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None):
30
+ """Initialize TQL instance.
31
+
32
+ Args:
33
+ field_mappings: Optional mapping of TQL field names to backend field names.
34
+ Supports multiple formats:
35
+
36
+ 1. Simple: {"field": "keyword"}
37
+
38
+ 2. Complex: {"field": {"field": "keyword", "field.text": "text"}}
39
+
40
+ 3. Enhanced with analyzer info:
41
+ {
42
+ "field": {
43
+ "field": "keyword",
44
+ "field.text": {
45
+ "type": "text",
46
+ "analyzer": {
47
+ "tokenizer": {"type": "whitespace"},
48
+ "filters": ["lowercase"]
49
+ }
50
+ }
51
+ }
52
+ }
53
+ """
54
+ self.parser = TQLParser()
55
+ self.evaluator = TQLEvaluator()
56
+ self.field_mappings = field_mappings or {}
57
+
58
+ # Create enhanced field mappings for optimization
59
+ self.enhanced_mappings = {}
60
+ if self.field_mappings:
61
+ for field_name, field_config in self.field_mappings.items():
62
+ self.enhanced_mappings[field_name] = EnhancedFieldMapping({field_name: field_config})
63
+
64
+ # Check if any mappings have analyzer information
65
+ self.has_analyzer_info = any(mapping.is_enhanced_mapping() for mapping in self.enhanced_mappings.values())
66
+
67
+ # Extract simple mappings for evaluator
68
+ # For complex mappings (dict), use the first key as the simple mapping
69
+ self._simple_mappings = {}
70
+ for k, v in self.field_mappings.items():
71
+ if isinstance(v, str):
72
+ # Check if this looks like a type specification (common OpenSearch types)
73
+ if v in [
74
+ "keyword",
75
+ "text",
76
+ "long",
77
+ "integer",
78
+ "short",
79
+ "byte",
80
+ "double",
81
+ "float",
82
+ "boolean",
83
+ "date",
84
+ "ip",
85
+ ]:
86
+ # This is a type specification, not a field mapping
87
+ # Map the field to itself
88
+ self._simple_mappings[k] = k
89
+ else:
90
+ # This is a field name mapping
91
+ self._simple_mappings[k] = v
92
+ elif isinstance(v, dict) and v:
93
+ # Check if this is a type specification dict
94
+ if "type" in v and len(v) == 1:
95
+ # This is just a type specification, map field to itself
96
+ self._simple_mappings[k] = k
97
+ elif "type" in v and "fields" in v:
98
+ # This is an OpenSearch-style mapping, map field to itself
99
+ self._simple_mappings[k] = k
100
+ else:
101
+ # Extract the first key as the simple mapping
102
+ self._simple_mappings[k] = next(iter(v.keys()))
103
+ else:
104
+ # Default to mapping field to itself
105
+ self._simple_mappings[k] = k
106
+
107
+ # Simple mappings will be passed to evaluator methods as needed
108
+
109
+ # Initialize component operations
110
+ self.opensearch_ops = OpenSearchOperations(self.parser, self.field_mappings, self.enhanced_mappings)
111
+ self.file_ops = FileOperations()
112
+ self.stats_ops = StatsOperations(self.parser, self._simple_mappings)
113
+ self.validation_ops = ValidationOperations(self.parser, self.field_mappings)
114
+
115
+ def parse(self, query: str) -> Dict[str, Any]:
116
+ """Parse a TQL query string into an AST.
117
+
118
+ Args:
119
+ query: TQL query string
120
+
121
+ Returns:
122
+ Abstract Syntax Tree (AST) representation
123
+
124
+ Raises:
125
+ TQLParseError: If the query has invalid syntax
126
+ """
127
+ return self.parser.parse(query)
128
+
129
+ def validate(self, query: str, validate_fields: bool = False) -> bool:
130
+ """Validate a TQL query for syntax and optionally field names.
131
+
132
+ Args:
133
+ query: TQL query string
134
+ validate_fields: Whether to validate field names against mappings
135
+
136
+ Returns:
137
+ True if query is valid, False if syntax errors
138
+
139
+ Raises:
140
+ TQLFieldError: If field validation fails
141
+ TQLValidationError: If type validation fails
142
+ """
143
+ try:
144
+ return self.validation_ops.validate(query, validate_fields)
145
+ except TQLSyntaxError:
146
+ # Syntax errors mean invalid query
147
+ return False
148
+
149
+ def _ast_to_query_string(self, ast: Dict[str, Any]) -> str:
150
+ """Convert AST back to query string for display purposes."""
151
+ if ast.get("type") == "comparison":
152
+ field = ast.get("field", "")
153
+ op = ast.get("operator", "")
154
+ value = ast.get("value", "")
155
+ if isinstance(value, str):
156
+ value = f'"{value}"'
157
+ return f"{field} {op} {value}"
158
+ elif ast.get("type") == "logical_op":
159
+ left = self._ast_to_query_string(ast.get("left", {}))
160
+ right = self._ast_to_query_string(ast.get("right", {}))
161
+ op = ast.get("operator", "").upper()
162
+ return f"({left} {op} {right})"
163
+ elif ast.get("type") == "unary_op":
164
+ operand = self._ast_to_query_string(ast.get("operand", {}))
165
+ return f"NOT {operand}"
166
+ return str(ast)
167
+
168
+ def query(self, data: Union[List[Dict], str], query: str, save_enrichment: bool = False) -> List[Dict[str, Any]]:
169
+ """Execute a TQL query against data and return matching records.
170
+
171
+ Args:
172
+ data: List of dictionaries or path to JSON/CSV file
173
+ query: TQL query string
174
+ save_enrichment: If True and mutators add enrichment fields, save back to source
175
+
176
+ Returns:
177
+ List of records matching the query
178
+
179
+ Raises:
180
+ TQLParseError: If query parsing fails
181
+ TQLExecutionError: If query execution fails
182
+ """
183
+ # Parse the query
184
+ ast = self.parse(query)
185
+
186
+ # Load data if it's a file path
187
+ if isinstance(data, str):
188
+ records = self.file_ops.load_file(data)
189
+ source_file = data
190
+ else:
191
+ records = data
192
+ source_file = None
193
+
194
+ # Execute the query
195
+ results = []
196
+ has_enrichments = False
197
+
198
+ # Field mappings will be passed to evaluator methods
199
+
200
+ for record in records:
201
+ # Check if record matches
202
+ if self.evaluator._evaluate_node(ast, record, self._simple_mappings):
203
+ # Apply any mutators to enrich the record
204
+ enriched_record = self._apply_mutators_to_record(ast, record)
205
+ results.append(enriched_record)
206
+ # Check if enrichments were added
207
+ if not has_enrichments and enriched_record is not record:
208
+ has_enrichments = True
209
+
210
+ # No need to restore mappings since we pass them as parameters
211
+
212
+ # Save enrichments if requested
213
+ if save_enrichment and has_enrichments and source_file:
214
+ # For file sources, update all records (not just matches)
215
+ all_enriched = []
216
+ for record in records:
217
+ enriched_record = self._apply_mutators_to_record(ast, record)
218
+ all_enriched.append(enriched_record)
219
+
220
+ # Save based on file type
221
+ if source_file.lower().endswith(".json"):
222
+ self.file_ops.save_enrichments_to_json(source_file, all_enriched)
223
+
224
+ return results
225
+
226
+ def query_single(self, record: Dict[str, Any], query: str) -> bool:
227
+ """Check if a single record matches a TQL query.
228
+
229
+ Args:
230
+ record: Dictionary to test
231
+ query: TQL query string
232
+
233
+ Returns:
234
+ True if record matches the query
235
+
236
+ Raises:
237
+ TQLParseError: If query parsing fails
238
+ """
239
+ ast = self.parse(query)
240
+ return self.evaluator._evaluate_node(ast, record, self._simple_mappings)
241
+
242
+ def to_opensearch(self, query: str) -> Dict[str, Any]:
243
+ """Convert TQL query to OpenSearch query format.
244
+
245
+ Args:
246
+ query: TQL query string
247
+
248
+ Returns:
249
+ OpenSearch query dictionary
250
+
251
+ Raises:
252
+ TQLParseError: If query parsing fails
253
+ """
254
+ return self.opensearch_ops.to_opensearch(query)
255
+
256
+ def to_opensearch_dsl(self, query: str) -> Dict[str, Any]:
257
+ """Convert TQL query to OpenSearch DSL format.
258
+
259
+ This is an alias for to_opensearch() for backward compatibility.
260
+
261
+ Args:
262
+ query: TQL query string
263
+
264
+ Returns:
265
+ OpenSearch DSL query dictionary
266
+ """
267
+ return self.opensearch_ops.to_opensearch_dsl(query)
268
+
269
+ def analyze_query(self, query: str, context: str = "in_memory") -> Dict[str, Any]: # noqa: C901
270
+ """Analyze a TQL query for structure, complexity, and potential issues.
271
+
272
+ Args:
273
+ query: TQL query string
274
+ context: Execution context ("in_memory" or "opensearch")
275
+
276
+ Returns:
277
+ Dictionary containing analysis results including:
278
+ - ast: The parsed AST
279
+ - stats: Query statistics (fields used, operators, etc.)
280
+ - complexity: Query complexity metrics
281
+ - warnings: Potential issues or optimizations
282
+ - health: Overall query health assessment
283
+ """
284
+ # Parse the query
285
+ ast = self.parse(query)
286
+
287
+ # Collect basic statistics
288
+ stats: Dict[str, Any] = {
289
+ "fields": set(),
290
+ "operators": set(),
291
+ "logical_operators": set(),
292
+ "has_mutators": False,
293
+ "has_type_hints": False,
294
+ "depth": 0,
295
+ }
296
+
297
+ def traverse_ast(node, depth=0):
298
+ if isinstance(node, dict):
299
+ stats["depth"] = max(stats["depth"], depth)
300
+ node_type = node.get("type")
301
+
302
+ if node_type == "comparison":
303
+ if "field" in node:
304
+ stats["fields"].add(node["field"])
305
+ if "operator" in node:
306
+ stats["operators"].add(node["operator"])
307
+ if node.get("type_hint"):
308
+ stats["has_type_hints"] = True
309
+ if node.get("field_mutators") or node.get("value_mutators"):
310
+ stats["has_mutators"] = True
311
+ elif node_type == "logical_op":
312
+ stats["logical_operators"].add(node.get("operator"))
313
+ traverse_ast(node.get("left"), depth + 1)
314
+ traverse_ast(node.get("right"), depth + 1)
315
+ elif node_type == "unary_op":
316
+ stats["logical_operators"].add("not")
317
+ traverse_ast(node.get("operand"), depth + 1)
318
+
319
+ traverse_ast(ast)
320
+
321
+ # Convert sets to lists for JSON serialization
322
+ stats["fields"] = sorted(stats["fields"])
323
+ stats["operators"] = sorted(stats["operators"])
324
+ stats["logical_operators"] = sorted(stats["logical_operators"])
325
+
326
+ # Check for warnings and performance issues
327
+ warnings = self.validation_ops.check_performance_issues(ast, query)
328
+
329
+ # Assess overall health
330
+ health_score = 100
331
+ health_reasons = []
332
+
333
+ if stats["depth"] > 5:
334
+ health_score -= 20
335
+ health_reasons.append("Query is deeply nested (depth > 5)")
336
+
337
+ if len(stats["fields"]) > 10:
338
+ health_score -= 10
339
+ health_reasons.append("Query uses many fields (>10)")
340
+
341
+ for warning in warnings:
342
+ if warning["severity"] == "error":
343
+ health_score -= 20
344
+ elif warning["severity"] == "warning":
345
+ health_score -= 10
346
+
347
+ # If there are mutators, analyze their performance impact
348
+ mutator_health_info = None
349
+ if stats["has_mutators"]:
350
+ # Use MutatorAnalyzer to evaluate mutator performance
351
+ from .mutator_analyzer import MutatorAnalyzer
352
+
353
+ analyzer = MutatorAnalyzer(self.field_mappings)
354
+ mutator_analysis = analyzer.analyze_ast(ast, context=context)
355
+
356
+ # Get the context-specific health evaluation
357
+ mutator_health_info = {
358
+ "health_status": mutator_analysis.health_status,
359
+ "health_reasons": mutator_analysis.health_reasons,
360
+ "post_processing_required": len(mutator_analysis.post_processing_requirements) > 0,
361
+ }
362
+
363
+ # Adjust overall health based on mutator analysis
364
+ if mutator_analysis.health_status == "red":
365
+ health_score -= 30
366
+ health_reasons.extend([r["reason"] for r in mutator_analysis.health_reasons if r["status"] == "red"])
367
+ elif mutator_analysis.health_status == "yellow":
368
+ health_score -= 15
369
+ health_reasons.extend([r["reason"] for r in mutator_analysis.health_reasons if r["status"] == "yellow"])
370
+
371
+ health_status = "good" if health_score >= 80 else "fair" if health_score >= 60 else "poor"
372
+
373
+ result = {
374
+ "query": query,
375
+ "ast": ast,
376
+ "stats": stats,
377
+ "complexity": {
378
+ "depth": stats["depth"],
379
+ "field_count": len(stats["fields"]),
380
+ "operator_count": len(stats["operators"]) + len(stats["logical_operators"]),
381
+ },
382
+ "warnings": warnings,
383
+ "health": {"status": health_status, "score": health_score, "reasons": health_reasons},
384
+ }
385
+
386
+ # Add mutator health info if present
387
+ if mutator_health_info:
388
+ result["mutator_health"] = mutator_health_info
389
+
390
+ return result
391
+
392
+ def execute_opensearch( # noqa: C901
393
+ self,
394
+ opensearch_client: Any = None,
395
+ index: Optional[str] = None,
396
+ query: Optional[str] = None,
397
+ size: int = 500,
398
+ from_: int = 0,
399
+ timestamp_field: str = "@timestamp",
400
+ time_range: Optional[Dict[str, str]] = None,
401
+ scan_all: bool = False,
402
+ scroll_size: int = 1000,
403
+ scroll_timeout: str = "5m",
404
+ save_enrichment: bool = False,
405
+ **kwargs,
406
+ ) -> Dict[str, Any]:
407
+ """Execute a TQL query against OpenSearch with post-processing and enhanced features.
408
+
409
+ This method maintains backward compatibility while delegating to the new implementation.
410
+
411
+ Args:
412
+ opensearch_client: OpenSearch client instance (ignored - uses internal client)
413
+ index: Index name to search
414
+ query: The TQL query string
415
+ size: Number of results to return (default: 500)
416
+ from_: Starting offset for pagination (default: 0)
417
+ timestamp_field: Field name for timestamp filtering (default: "@timestamp")
418
+ time_range: Optional time range dict with 'gte' and/or 'lte' keys.
419
+ If None, defaults to 'gte': 'now-15m', 'lte': 'now'
420
+ scan_all: If True, use scroll API to retrieve all matching documents
421
+ scroll_size: Size per scroll when scan_all=True (default: 1000)
422
+ scroll_timeout: Scroll timeout when scan_all=True (default: "5m")
423
+ save_enrichment: If True, save enriched documents back to OpenSearch
424
+
425
+ Returns:
426
+ Dictionary containing:
427
+ - results: List of processed results
428
+ - total: Total number of matching documents
429
+ - post_processing_applied: Whether post-processing was applied
430
+ - health_status: Query health status
431
+ - health_reasons: List of health issues
432
+ - performance_impact: Post-processing performance impact info
433
+ - scan_info: Information about scan operation (if scan_all=True)
434
+
435
+ Raises:
436
+ TQLExecutionError: If query execution fails
437
+ """
438
+ # Handle both old and new calling conventions
439
+ if isinstance(opensearch_client, str) and query is None:
440
+ # New style: execute_opensearch(query, index=index, ...)
441
+ query = opensearch_client
442
+ index = index
443
+ # Check if opensearch_client is in kwargs (for test mocking)
444
+ if "opensearch_client" in kwargs:
445
+ opensearch_client = kwargs["opensearch_client"]
446
+ elif query is None:
447
+ raise ValueError("Query parameter is required")
448
+
449
+ # Remove parameters that the new implementation doesn't understand
450
+ filtered_kwargs = {k: v for k, v in kwargs.items() if k not in ["save_enrichment", "opensearch_client"]}
451
+
452
+ # Add the supported parameters
453
+ filtered_kwargs.update(
454
+ {
455
+ "timestamp_field": timestamp_field,
456
+ "time_range": time_range,
457
+ "scan_all": scan_all,
458
+ "scroll_size": scroll_size,
459
+ "scroll_timeout": scroll_timeout,
460
+ }
461
+ )
462
+
463
+ # Add opensearch_client if it was provided (e.g., for mocking)
464
+ if opensearch_client is not None and not isinstance(opensearch_client, str):
465
+ filtered_kwargs["client"] = opensearch_client
466
+
467
+ # Execute using new implementation
468
+ results = self.opensearch_ops.execute_opensearch(query, index=index, size=size, from_=from_, **filtered_kwargs)
469
+
470
+ # Convert to old format if needed
471
+ if isinstance(results, list):
472
+ # Legacy format - convert to dict
473
+ # Check if post-processing was applied by analyzing the query
474
+ post_processing_applied = False
475
+ try:
476
+ analysis = self.analyze_query(query, context="opensearch")
477
+ if "mutator_health" in analysis and analysis["mutator_health"].get("post_processing_required"):
478
+ post_processing_applied = True
479
+ except Exception:
480
+ # Ignore analysis errors - this is just for health status
481
+ pass
482
+
483
+ # Determine health status based on post-processing
484
+ if post_processing_applied:
485
+ health_status = "yellow"
486
+ health_reasons = ["Post-processing required - results may be incomplete with pagination"]
487
+ else:
488
+ health_status = "green"
489
+ health_reasons = []
490
+
491
+ return {
492
+ "results": results,
493
+ "total": len(results),
494
+ "post_processing_applied": post_processing_applied,
495
+ "health_status": health_status,
496
+ "health_reasons": health_reasons,
497
+ "performance_impact": {"overhead_ms": 0, "mutators_applied": 0},
498
+ "scan_info": {"used_scan": False},
499
+ "optimizations_applied": [],
500
+ }
501
+ else:
502
+ # Already in dict format - just ensure required fields exist for backward compatibility
503
+ if "scan_info" not in results:
504
+ results["scan_info"] = {"used_scan": False}
505
+ if "optimizations_applied" not in results:
506
+ results["optimizations_applied"] = []
507
+ return results
508
+
509
+ def evaluate(self, query: str) -> Dict[str, Any]: # noqa: C901
510
+ """Evaluate a TQL query for validation, health status, and field mapping information.
511
+
512
+ This method validates the query and returns comprehensive information including
513
+ health status, validation results, and field mappings.
514
+
515
+ Args:
516
+ query: TQL query string
517
+
518
+ Returns:
519
+ Dictionary containing:
520
+ - is_valid: Whether the query is syntactically valid
521
+ - errors: List of validation errors with type, message, field, position
522
+ - fields: Dictionary mapping field names to their mappings
523
+ - health: Health status ('green', 'yellow', or 'red')
524
+ - health_reasons: List of health issues with status, query_part, and reason
525
+ """
526
+ # Initialize result
527
+ result: Dict[str, Any] = {"is_valid": True, "errors": [], "fields": {}, "health": "green", "health_reasons": []}
528
+
529
+ # Handle empty query
530
+ if not query or not query.strip():
531
+ result["is_valid"] = False
532
+ result["errors"].append({"type": "TQLSyntaxError", "message": "Empty query", "position": 0})
533
+ return result
534
+
535
+ try:
536
+ # First try to parse the query
537
+ ast = self.parse(query)
538
+
539
+ # Extract field information from AST
540
+ fields = {}
541
+ field_names = self._extract_fields_from_ast(ast)
542
+
543
+ # Process field mappings and check for unknown fields
544
+ for field in field_names:
545
+ if field in self.field_mappings:
546
+ # Get the mapped field name(s)
547
+ mapping = self.field_mappings[field]
548
+ if isinstance(mapping, str):
549
+ fields[field] = [mapping]
550
+ elif isinstance(mapping, dict):
551
+ # Handle intelligent mappings
552
+ mapped_fields = []
553
+ for key, value in mapping.items():
554
+ if isinstance(value, dict) and "type" in value:
555
+ # This is a field mapping with type info
556
+ mapped_fields.append(key)
557
+ elif isinstance(value, str):
558
+ # Direct mapping
559
+ mapped_fields.append(key)
560
+ fields[field] = mapped_fields if mapped_fields else [field]
561
+ else:
562
+ fields[field] = []
563
+ else:
564
+ # No mapping found
565
+ fields[field] = []
566
+ if self.field_mappings: # Only flag as error if mappings are defined
567
+ result["is_valid"] = False
568
+ # Get available fields for suggestions
569
+ available_fields = sorted(self.field_mappings.keys())
570
+ error_msg = f"Unknown field '{field}'. Available fields: {', '.join(available_fields[:5])}"
571
+ if len(available_fields) > 5:
572
+ error_msg += f" and {len(available_fields) - 5} more"
573
+
574
+ result["errors"].append(
575
+ {
576
+ "type": "TQLFieldError",
577
+ "message": error_msg,
578
+ "field": field,
579
+ "position": query.find(field),
580
+ }
581
+ )
582
+
583
+ # Update health status
584
+ result["health"] = "red"
585
+ result["health_reasons"].append(
586
+ {"status": "red", "query_part": field, "reason": f"Unknown field '{field}'"}
587
+ )
588
+
589
+ result["fields"] = fields
590
+
591
+ # Check for type compatibility if mappings are provided
592
+ if self.field_mappings:
593
+ try:
594
+ # Validate types
595
+ self.validation_ops._check_type_compatibility(ast)
596
+
597
+ # Check for performance issues
598
+ warnings = self.validation_ops.check_performance_issues(ast, query)
599
+
600
+ # Process warnings
601
+ for warning in warnings:
602
+ if warning["severity"] == "error":
603
+ result["health"] = "red"
604
+ result["health_reasons"].append(
605
+ {
606
+ "status": "red",
607
+ "query_part": warning.get("query_part", warning.get("field", query[:20] + "...")),
608
+ "reason": warning["message"],
609
+ }
610
+ )
611
+ elif warning["severity"] == "warning":
612
+ if result["health"] == "green":
613
+ result["health"] = "yellow"
614
+ result["health_reasons"].append(
615
+ {
616
+ "status": "yellow",
617
+ "query_part": warning.get("query_part", warning.get("field", query[:20] + "...")),
618
+ "reason": warning["message"],
619
+ }
620
+ )
621
+
622
+ except (TQLValidationError, TQLTypeError) as e:
623
+ # Type validation error
624
+ result["is_valid"] = False
625
+ error_msg = str(e)
626
+ error_field: Optional[str] = getattr(e, "field", None)
627
+ error_operator: Optional[str] = getattr(e, "operator", None)
628
+
629
+ result["errors"].append(
630
+ {
631
+ "type": "TQLTypeError",
632
+ "message": error_msg,
633
+ "field": error_field,
634
+ "operator": error_operator,
635
+ "position": (
636
+ query.find(f"{error_field} {error_operator}")
637
+ if error_field and error_operator
638
+ else None
639
+ ),
640
+ }
641
+ )
642
+
643
+ # Add to health_reasons
644
+ query_part = (
645
+ f"{error_field} {error_operator} ..." if error_field and error_operator else query[:20] + "..."
646
+ )
647
+ result["health"] = "red"
648
+ result["health_reasons"].append({"status": "red", "query_part": query_part, "reason": error_msg})
649
+
650
+ except TQLOperatorError as e:
651
+ # Operator usage error
652
+ result["is_valid"] = False
653
+ result["errors"].append(
654
+ {"type": "TQLOperatorError", "message": str(e), "position": getattr(e, "position", None)}
655
+ )
656
+
657
+ except TQLSyntaxError as e:
658
+ # Syntax errors
659
+ result["is_valid"] = False
660
+ position = getattr(e, "position", None)
661
+ # suggestions = getattr(e, "suggestions", []) # Reserved for future use
662
+
663
+ # Try to extract field from syntax error
664
+ field = None
665
+ try:
666
+ # Parse partial AST to get field
667
+ partial_query = query[:position] if position else query
668
+ if " " in partial_query:
669
+ field = partial_query.split()[0]
670
+ except Exception:
671
+ # Ignore errors when trying to extract field from partial query
672
+ pass
673
+
674
+ error_entry = {"type": "TQLSyntaxError", "message": str(e), "position": position}
675
+
676
+ # Add field to errors if we could extract it
677
+ if field:
678
+ result["fields"][field] = []
679
+
680
+ result["errors"].append(error_entry)
681
+ result["health"] = "red"
682
+
683
+ # Add to health_reasons
684
+ result["health_reasons"].append(
685
+ {"status": "red", "query_part": query[:20] + "..." if len(query) > 20 else query, "reason": str(e)}
686
+ )
687
+
688
+ except TQLParseError as e:
689
+ # General parsing errors
690
+ result["is_valid"] = False
691
+ result["errors"].append(
692
+ {"type": "TQLParseError", "message": str(e), "position": getattr(e, "position", None)}
693
+ )
694
+ result["health"] = "red"
695
+ result["health_reasons"].append(
696
+ {"status": "red", "query_part": query[:20] + "..." if len(query) > 20 else query, "reason": str(e)}
697
+ )
698
+
699
+ except Exception as e:
700
+ # Unexpected errors
701
+ result["is_valid"] = False
702
+ result["errors"].append({"type": type(e).__name__, "message": str(e), "position": None})
703
+ result["health"] = "red"
704
+ result["health_reasons"].append(
705
+ {"status": "red", "query_part": query[:20] + "..." if len(query) > 20 else query, "reason": str(e)}
706
+ )
707
+
708
+ return result
709
+
710
+ def _extract_fields_from_ast(self, ast: Dict[str, Any]) -> List[str]: # noqa: C901
711
+ """Extract all field names from an AST recursively."""
712
+ fields = []
713
+
714
+ if isinstance(ast, dict):
715
+ node_type = ast.get("type")
716
+
717
+ if node_type == "comparison":
718
+ field = ast.get("field")
719
+ if field:
720
+ fields.append(field)
721
+
722
+ elif node_type == "collection_op":
723
+ field = ast.get("field")
724
+ if field:
725
+ fields.append(field)
726
+
727
+ elif node_type == "logical_op":
728
+ # Recursively extract from both sides
729
+ fields.extend(self._extract_fields_from_ast(ast.get("left", {})))
730
+ fields.extend(self._extract_fields_from_ast(ast.get("right", {})))
731
+
732
+ elif node_type == "unary_op":
733
+ # Recursively extract from operand
734
+ fields.extend(self._extract_fields_from_ast(ast.get("operand", {})))
735
+
736
+ elif node_type == "mutator":
737
+ # Extract from source
738
+ fields.extend(self._extract_fields_from_ast(ast.get("source", {})))
739
+
740
+ elif node_type == "query":
741
+ # Extract from filter
742
+ if "filter" in ast:
743
+ fields.extend(self._extract_fields_from_ast(ast["filter"]))
744
+
745
+ return list(set(fields)) # Remove duplicates
746
+
747
+ def analyze(self, query: str) -> Dict[str, Any]:
748
+ """Analyze a TQL query and return detailed execution information.
749
+
750
+ This method parses the query and returns information about how it would
751
+ be executed, without actually running it against data.
752
+
753
+ Args:
754
+ query: TQL query string
755
+
756
+ Returns:
757
+ Dictionary containing:
758
+ - ast: The parsed AST
759
+ - opensearch: The OpenSearch query (if applicable)
760
+ - explanation: Human-readable explanation
761
+ - field_mappings: Applied field mappings
762
+ """
763
+ # Parse the query
764
+ ast = self.parse(query)
765
+
766
+ # Convert to OpenSearch format
767
+ try:
768
+ opensearch_query = self.to_opensearch(query)
769
+ except Exception:
770
+ opensearch_query = None
771
+
772
+ # Generate explanation
773
+ explanation = self._ast_to_query_string(ast)
774
+
775
+ # Collect field mappings used
776
+ used_mappings = {}
777
+ fields = self.extract_fields(query)
778
+ for field in fields:
779
+ if field in self.field_mappings:
780
+ used_mappings[field] = self.field_mappings[field]
781
+
782
+ return {
783
+ "query": query,
784
+ "ast": ast,
785
+ "opensearch": opensearch_query,
786
+ "explanation": explanation,
787
+ "field_mappings": used_mappings,
788
+ }
789
+
790
+ def explain(self, query: str) -> Dict[str, Any]:
791
+ """Explain how a TQL query will be executed.
792
+
793
+ Provides a detailed breakdown of query parsing, field mappings,
794
+ and execution strategy.
795
+
796
+ Args:
797
+ query: TQL query string
798
+
799
+ Returns:
800
+ Detailed execution plan
801
+ """
802
+ result = self.analyze(query)
803
+
804
+ # Add execution strategy
805
+ result["execution_strategy"] = {
806
+ "backend": "opensearch" if result["opensearch"] else "in-memory",
807
+ "optimizations": [],
808
+ }
809
+
810
+ # Add any optimizations
811
+ if self.has_analyzer_info:
812
+ result["execution_strategy"]["optimizations"].append(
813
+ "Using enhanced field mappings with analyzer information"
814
+ )
815
+
816
+ return result
817
+
818
+ def explain_optimization(self, query: str) -> Dict[str, Any]:
819
+ """Explain query optimizations for OpenSearch execution.
820
+
821
+ Shows how mutators are split between Phase 1 (OpenSearch) and Phase 2 (post-processing).
822
+
823
+ Args:
824
+ query: TQL query string
825
+
826
+ Returns:
827
+ Optimization explanation including phase breakdown
828
+ """
829
+ # Use opensearch_ops.analyze_opensearch_query for backward compatibility
830
+ analysis = self.opensearch_ops.analyze_opensearch_query(query)
831
+
832
+ if isinstance(analysis, MutatorAnalysisResult):
833
+ # Has mutators - show phase breakdown
834
+ optimized_query = self._ast_to_query_string(analysis.optimized_ast)
835
+
836
+ # Extract post-processing requirements
837
+ post_processing_mutators = []
838
+ for req in analysis.post_processing_requirements:
839
+ post_processing_mutators.extend(req.mutators)
840
+
841
+ return {
842
+ "query": query,
843
+ "has_mutators": True,
844
+ "phase1": {
845
+ "description": "OpenSearch query (with optimizations applied)",
846
+ "query": optimized_query,
847
+ "optimizations": analysis.optimizations_applied,
848
+ },
849
+ "phase2": {
850
+ "description": "Post-processing filters and enrichments",
851
+ "requirements": analysis.post_processing_requirements,
852
+ "mutators": post_processing_mutators,
853
+ },
854
+ "health": {"status": analysis.health_status, "reasons": analysis.health_reasons},
855
+ }
856
+ else:
857
+ # No mutators
858
+ return {
859
+ "query": query,
860
+ "has_mutators": False,
861
+ "opensearch_query": analysis["opensearch_query"],
862
+ "optimizations": analysis["optimizations"],
863
+ "notes": ["Query can be fully executed in OpenSearch without post-processing"],
864
+ }
865
+
866
+ def extract_fields(self, query: str) -> List[str]:
867
+ """Extract all unique field names referenced in a TQL query.
868
+
869
+ Args:
870
+ query: TQL query string
871
+
872
+ Returns:
873
+ Sorted list of unique field names
874
+
875
+ Raises:
876
+ TQLParseError: If query parsing fails
877
+ """
878
+ return self.parser.extract_fields(query)
879
+
880
+ def stats(self, data: Union[List[Dict], str], stats_query: str) -> Dict[str, Any]:
881
+ """Execute a statistics query on data.
882
+
883
+ Args:
884
+ data: List of records or file path
885
+ stats_query: Stats query string (e.g., "| stats count() by status")
886
+
887
+ Returns:
888
+ Dictionary containing aggregation results
889
+ """
890
+ return self.stats_ops.stats(data, stats_query)
891
+
892
+ def query_stats(self, data: Union[List[Dict], str], query: str) -> Dict[str, Any]:
893
+ """Execute a TQL query with stats aggregation.
894
+
895
+ This combines filtering and statistical aggregation in one query.
896
+
897
+ Args:
898
+ data: List of records or file path
899
+ query: Combined query string (e.g., "status = 'active' | stats count() by type")
900
+
901
+ Returns:
902
+ Dictionary containing aggregation results
903
+ """
904
+ return self.stats_ops.query_stats(data, query)
905
+
906
+ def analyze_stats_query(self, query: str) -> Dict[str, Any]:
907
+ """Analyze a stats query for performance and correctness.
908
+
909
+ Args:
910
+ query: Stats query string
911
+
912
+ Returns:
913
+ Analysis results including AST and any warnings
914
+ """
915
+ return self.stats_ops.analyze_stats_query(query)
916
+
917
+ def _apply_mutators_to_record(self, ast: Dict[str, Any], record: Dict[str, Any]) -> Dict[str, Any]:
918
+ """Apply any mutators in the AST to enrich the record.
919
+
920
+ Args:
921
+ ast: Query AST that may contain mutators
922
+ record: Record to enrich
923
+
924
+ Returns:
925
+ Enriched record (may be same as input if no enrichments)
926
+ """
927
+ # For now, return the original record
928
+ # TODO: Implement mutator application for enrichment # noqa: W0511
929
+ return record