tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Core Components
|
|
2
|
+
|
|
3
|
+
This package contains the modular components that implement TQL's core functionality.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The core components package splits the TQL core functionality into specialized modules:
|
|
8
|
+
|
|
9
|
+
### Components
|
|
10
|
+
|
|
11
|
+
#### `opensearch_operations.py` - OpenSearch Operations
|
|
12
|
+
Handles all OpenSearch-specific functionality:
|
|
13
|
+
- Query conversion to OpenSearch DSL
|
|
14
|
+
- Query execution against OpenSearch clusters
|
|
15
|
+
- Mutator analysis and optimization
|
|
16
|
+
- Phase 1/Phase 2 query splitting for optimal performance
|
|
17
|
+
- Result post-processing
|
|
18
|
+
|
|
19
|
+
**Key Methods:**
|
|
20
|
+
- `to_opensearch()` - Convert TQL to OpenSearch query
|
|
21
|
+
- `execute_opensearch()` - Execute query and return results
|
|
22
|
+
- `analyze_opensearch_query()` - Analyze query optimization opportunities (internal use)
|
|
23
|
+
|
|
24
|
+
#### `file_operations.py` - File I/O Operations
|
|
25
|
+
Manages file loading and saving:
|
|
26
|
+
- JSON file support with pretty printing
|
|
27
|
+
- CSV file support (read-only)
|
|
28
|
+
- Enrichment saving back to source files
|
|
29
|
+
- Automatic file type detection
|
|
30
|
+
|
|
31
|
+
**Key Methods:**
|
|
32
|
+
- `load_file()` - Load data from JSON/CSV files
|
|
33
|
+
- `save_enrichments_to_json()` - Save enriched data back to JSON
|
|
34
|
+
|
|
35
|
+
#### `stats_operations.py` - Statistics Operations
|
|
36
|
+
Implements statistical aggregations:
|
|
37
|
+
- Aggregation functions (count, sum, avg, min, max, etc.)
|
|
38
|
+
- Group-by operations
|
|
39
|
+
- Combined filter and stats queries
|
|
40
|
+
- Stats query analysis
|
|
41
|
+
|
|
42
|
+
**Key Methods:**
|
|
43
|
+
- `stats()` - Execute stats-only queries
|
|
44
|
+
- `query_stats()` - Execute combined filter + stats queries
|
|
45
|
+
- `analyze_stats_query()` - Analyze stats query for issues
|
|
46
|
+
|
|
47
|
+
#### `validation_operations.py` - Query Validation
|
|
48
|
+
Provides comprehensive query validation:
|
|
49
|
+
- Syntax validation via parsing
|
|
50
|
+
- Field name validation against mappings
|
|
51
|
+
- Type compatibility checking
|
|
52
|
+
- Performance issue detection
|
|
53
|
+
- Query complexity analysis
|
|
54
|
+
|
|
55
|
+
**Key Methods:**
|
|
56
|
+
- `validate()` - Validate query syntax and fields
|
|
57
|
+
- `check_type_compatibility()` - Verify operator/field type compatibility
|
|
58
|
+
- `check_performance_issues()` - Identify potential performance problems
|
|
59
|
+
|
|
60
|
+
## Usage
|
|
61
|
+
|
|
62
|
+
These components are used internally by the main `TQL` class. They should not be imported directly:
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
# Don't do this:
|
|
66
|
+
from tql.core_components.opensearch_operations import OpenSearchOperations
|
|
67
|
+
|
|
68
|
+
# Do this instead:
|
|
69
|
+
from tql import TQL
|
|
70
|
+
tql = TQL()
|
|
71
|
+
results = tql.execute_opensearch("status = 'active'", index="logs")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Architecture
|
|
75
|
+
|
|
76
|
+
The core follows a modular architecture:
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
TQL (main class)
|
|
80
|
+
├── OpenSearchOperations (OpenSearch integration)
|
|
81
|
+
├── FileOperations (file I/O)
|
|
82
|
+
├── StatsOperations (aggregations)
|
|
83
|
+
└── ValidationOperations (validation)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Design Principles
|
|
87
|
+
|
|
88
|
+
1. **Separation of Concerns**: Each component handles a specific domain
|
|
89
|
+
2. **Dependency Injection**: Components receive dependencies via constructor
|
|
90
|
+
3. **Stateless Operations**: Methods are mostly stateless for better testability
|
|
91
|
+
4. **Error Propagation**: Components raise specific TQL exceptions
|
|
92
|
+
5. **Type Safety**: Full type hints for better IDE support
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Core components package for TQL.
|
|
2
|
+
|
|
3
|
+
This package organizes TQL core functionality into logical modules:
|
|
4
|
+
- opensearch_operations: OpenSearch query conversion and execution
|
|
5
|
+
- file_operations: File loading and saving
|
|
6
|
+
- stats_operations: Statistical aggregations
|
|
7
|
+
- validation_operations: Query validation and type checking
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .file_operations import FileOperations
|
|
11
|
+
from .opensearch_operations import OpenSearchOperations
|
|
12
|
+
from .stats_operations import StatsOperations
|
|
13
|
+
from .validation_operations import ValidationOperations
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"OpenSearchOperations",
|
|
17
|
+
"FileOperations",
|
|
18
|
+
"StatsOperations",
|
|
19
|
+
"ValidationOperations",
|
|
20
|
+
]
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""File operations for TQL.
|
|
2
|
+
|
|
3
|
+
This module handles loading data from files and saving enrichments back to files.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import csv
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
from typing import Any, Dict, List
|
|
10
|
+
|
|
11
|
+
from ..exceptions import TQLExecutionError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FileOperations:
|
|
15
|
+
"""Handles file-based operations for TQL."""
|
|
16
|
+
|
|
17
|
+
def load_file(self, file_path: str) -> List[Dict[str, Any]]: # noqa: C901
|
|
18
|
+
"""Load data from a file (JSON or CSV).
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
file_path: Path to the file to load
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
List of dictionaries representing the data
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
TQLExecutionError: If file loading fails
|
|
28
|
+
"""
|
|
29
|
+
if not os.path.exists(file_path):
|
|
30
|
+
raise TQLExecutionError(f"File not found: {file_path}")
|
|
31
|
+
|
|
32
|
+
_, ext = os.path.splitext(file_path.lower())
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
if ext == ".json":
|
|
36
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
37
|
+
data = json.load(f)
|
|
38
|
+
if isinstance(data, dict):
|
|
39
|
+
# Single object, wrap in list
|
|
40
|
+
return [data]
|
|
41
|
+
elif isinstance(data, list):
|
|
42
|
+
return data
|
|
43
|
+
else:
|
|
44
|
+
raise TQLExecutionError(f"Invalid JSON format in {file_path}")
|
|
45
|
+
elif ext == ".csv":
|
|
46
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
47
|
+
reader = csv.DictReader(f)
|
|
48
|
+
return list(reader)
|
|
49
|
+
elif ext == ".jsonl":
|
|
50
|
+
# JSON Lines format - one JSON object per line
|
|
51
|
+
records = []
|
|
52
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
53
|
+
for line in f:
|
|
54
|
+
line = line.strip()
|
|
55
|
+
if line: # Skip empty lines
|
|
56
|
+
records.append(json.loads(line))
|
|
57
|
+
return records
|
|
58
|
+
else:
|
|
59
|
+
raise TQLExecutionError(f"Unsupported file format: {ext}")
|
|
60
|
+
except Exception as e:
|
|
61
|
+
raise TQLExecutionError(f"Error loading file {file_path}: {str(e)}")
|
|
62
|
+
|
|
63
|
+
def save_enrichments_to_json(self, file_path: str, records: List[Dict[str, Any]]) -> None:
|
|
64
|
+
"""Save enriched records back to JSON file.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
file_path: Path to save the file
|
|
68
|
+
records: List of records to save
|
|
69
|
+
"""
|
|
70
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
71
|
+
json.dump(records, f, indent=2, ensure_ascii=False)
|
|
72
|
+
|
|
73
|
+
def bulk_update_opensearch(
|
|
74
|
+
self, client: Any, index: str, records: List[Dict[str, Any]], id_field: str = "_id", batch_size: int = 100
|
|
75
|
+
) -> Dict[str, int]:
|
|
76
|
+
"""Bulk update records in OpenSearch.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
client: OpenSearch client instance
|
|
80
|
+
index: Index name
|
|
81
|
+
records: Records to update
|
|
82
|
+
id_field: Field containing document ID
|
|
83
|
+
batch_size: Number of documents per bulk request
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Dictionary with update statistics
|
|
87
|
+
"""
|
|
88
|
+
from opensearchpy.helpers import bulk as opensearch_bulk
|
|
89
|
+
|
|
90
|
+
updated = 0
|
|
91
|
+
failed = 0
|
|
92
|
+
|
|
93
|
+
# Process in batches
|
|
94
|
+
for i in range(0, len(records), batch_size):
|
|
95
|
+
batch = records[i : i + batch_size]
|
|
96
|
+
actions = []
|
|
97
|
+
|
|
98
|
+
for record in batch:
|
|
99
|
+
if id_field in record:
|
|
100
|
+
action = {
|
|
101
|
+
"_op_type": "update",
|
|
102
|
+
"_index": index,
|
|
103
|
+
"_id": record[id_field],
|
|
104
|
+
"doc": {k: v for k, v in record.items() if k != id_field},
|
|
105
|
+
}
|
|
106
|
+
actions.append(action)
|
|
107
|
+
|
|
108
|
+
if actions:
|
|
109
|
+
success, failures = opensearch_bulk(client, actions, raise_on_error=False)
|
|
110
|
+
updated += success
|
|
111
|
+
failed += len(failures)
|
|
112
|
+
|
|
113
|
+
return {"updated": updated, "failed": failed}
|