tellaro-query-language 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.2.5.dist-info/LICENSE +72 -0
- tellaro_query_language-0.2.5.dist-info/METADATA +806 -0
- {tellaro_query_language-0.2.3.dist-info → tellaro_query_language-0.2.5.dist-info}/RECORD +21 -18
- {tellaro_query_language-0.2.3.dist-info → tellaro_query_language-0.2.5.dist-info}/entry_points.txt +1 -0
- tql/cache/memory.py +1 -1
- tql/cli.py +484 -0
- tql/core.py +244 -5
- tql/evaluator.py +1 -1
- tql/evaluator_components/special_expressions.py +62 -10
- tql/evaluator_components/value_comparison.py +0 -4
- tql/exceptions.py +6 -4
- tql/field_type_inference.py +285 -0
- tql/mutators/geo.py +57 -20
- tql/opensearch_components/query_converter.py +1 -1
- tql/opensearch_stats.py +7 -6
- tql/parser.py +7 -3
- tql/post_processor.py +8 -4
- tql/scripts.py +3 -3
- tql/stats_evaluator.py +357 -5
- tql/streaming_file_processor.py +335 -0
- tellaro_query_language-0.2.3.dist-info/LICENSE +0 -21
- tellaro_query_language-0.2.3.dist-info/METADATA +0 -433
- {tellaro_query_language-0.2.3.dist-info → tellaro_query_language-0.2.5.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,806 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: tellaro-query-language
|
|
3
|
+
Version: 0.2.5
|
|
4
|
+
Summary: A flexible, human-friendly query language for searching and filtering structured data
|
|
5
|
+
Home-page: https://github.com/tellaro/tellaro-query-language
|
|
6
|
+
License: Proprietary
|
|
7
|
+
Keywords: query,language,opensearch,elasticsearch,search,filter,tql
|
|
8
|
+
Author: Justin Henderson
|
|
9
|
+
Author-email: justin@tellaro.io
|
|
10
|
+
Requires-Python: >=3.11,<3.14
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: Other/Proprietary License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
20
|
+
Provides-Extra: opensearch
|
|
21
|
+
Requires-Dist: dnspython (>=2.7.0,<3.0.0)
|
|
22
|
+
Requires-Dist: maxminddb (>=2.7.0,<3.0.0)
|
|
23
|
+
Requires-Dist: opensearch-dsl (>=2.1.0,<3.0.0) ; extra == "opensearch"
|
|
24
|
+
Requires-Dist: opensearch-py (>=2.4.2,<3.0.0) ; extra == "opensearch"
|
|
25
|
+
Requires-Dist: pyparsing (>=3.2.1,<4.0.0)
|
|
26
|
+
Requires-Dist: setuptools (>=80.0.0,<81.0.0)
|
|
27
|
+
Requires-Dist: urllib3 (>=2.5.0,<3.0.0)
|
|
28
|
+
Project-URL: Documentation, https://github.com/tellaro/tellaro-query-language/tree/main/docs
|
|
29
|
+
Project-URL: Repository, https://github.com/tellaro/tellaro-query-language
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# Tellaro Query Language (TQL)
|
|
33
|
+
|
|
34
|
+
[](https://badge.fury.io/py/tellaro-query-language)
|
|
35
|
+
[](./reports/pytest/junit.xml)
|
|
36
|
+
[](./reports/coverage/index.html)
|
|
37
|
+
[](./reports/flake8/index.html)
|
|
38
|
+
[](https://www.python.org/)
|
|
39
|
+
[](LICENSE)
|
|
40
|
+
|
|
41
|
+
**A flexible, human-friendly query language for searching and filtering structured data across files, databases, and search engines.**
|
|
42
|
+
|
|
43
|
+
TQL provides a unified, readable syntax for expressing complex queries that works seamlessly with:
|
|
44
|
+
- **Files**: Query JSON, JSONL, CSV files directly with CLI or Python API
|
|
45
|
+
- **OpenSearch/Elasticsearch**: Convert TQL to DSL queries automatically
|
|
46
|
+
- **In-Memory Data**: Filter Python dictionaries and lists
|
|
47
|
+
- **Statistical Analysis**: Built-in aggregations and grouping
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
# Query JSON files directly
|
|
51
|
+
results = tql.query("logs.jsonl", "status = 200 AND response_time > 500")
|
|
52
|
+
|
|
53
|
+
# Query OpenSearch with automatic DSL translation
|
|
54
|
+
results = tql.execute_opensearch(client, "events-*",
|
|
55
|
+
"user.role = 'admin' AND timestamp > '2024-01-01'")
|
|
56
|
+
|
|
57
|
+
# Aggregate data with stats
|
|
58
|
+
results = tql.query("sales.json", "region = 'west' | stats sum(revenue) by product")
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## 🚀 Quick Start
|
|
64
|
+
|
|
65
|
+
### Installation
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# Install from PyPI (Python package)
|
|
69
|
+
pip install tellaro-query-language
|
|
70
|
+
|
|
71
|
+
# Install with OpenSearch support
|
|
72
|
+
pip install tellaro-query-language[opensearch]
|
|
73
|
+
|
|
74
|
+
# Or install Rust CLI (300x faster for large files)
|
|
75
|
+
cargo install tellaro-query-language
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Query Files with CLI
|
|
79
|
+
|
|
80
|
+
TQL includes a blazing-fast command-line interface for querying files:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# Query JSON/JSONL files
|
|
84
|
+
tql 'status = "active"' users.json
|
|
85
|
+
tql 'age > 25 AND city = "NYC"' data.jsonl
|
|
86
|
+
|
|
87
|
+
# Query CSV files (auto-detects headers)
|
|
88
|
+
tql 'price > 100 AND category = "electronics"' products.csv
|
|
89
|
+
|
|
90
|
+
# Statistical aggregations
|
|
91
|
+
tql '| stats count() by status' events.jsonl
|
|
92
|
+
tql 'status = 200 | stats average(response_time) by endpoint' logs.jsonl
|
|
93
|
+
|
|
94
|
+
# Process folders recursively
|
|
95
|
+
tql 'level = "ERROR"' logs/ --pattern "*.jsonl" --recursive
|
|
96
|
+
|
|
97
|
+
# Pipe data from stdin
|
|
98
|
+
cat data.jsonl | tql 'score > 90'
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**Performance**: The Rust CLI processes 50MB files in milliseconds vs. seconds for Python implementations.
|
|
102
|
+
|
|
103
|
+
### Query Files with Python API
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from tql import TQL
|
|
107
|
+
|
|
108
|
+
tql = TQL()
|
|
109
|
+
|
|
110
|
+
# Query JSON files directly
|
|
111
|
+
results = tql.query("data.json", "user.role = 'admin' AND status = 'active'")
|
|
112
|
+
|
|
113
|
+
# Query with field transformations
|
|
114
|
+
results = tql.query("logs.jsonl", "email | lowercase contains '@example.com'")
|
|
115
|
+
|
|
116
|
+
# Statistical analysis
|
|
117
|
+
results = tql.query("sales.json", "| stats sum(revenue), avg(price) by category")
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Query In-Memory Data
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from tql import TQL
|
|
124
|
+
|
|
125
|
+
tql = TQL()
|
|
126
|
+
data = [
|
|
127
|
+
{'name': 'Alice', 'age': 30, 'city': 'NYC'},
|
|
128
|
+
{'name': 'Bob', 'age': 25, 'city': 'LA'},
|
|
129
|
+
{'name': 'Charlie', 'age': 35, 'city': 'NYC'}
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
# Simple queries
|
|
133
|
+
results = tql.query(data, 'age > 27')
|
|
134
|
+
# Returns: [{'name': 'Alice', 'age': 30, 'city': 'NYC'},
|
|
135
|
+
# {'name': 'Charlie', 'age': 35, 'city': 'NYC'}]
|
|
136
|
+
|
|
137
|
+
# Logical operators
|
|
138
|
+
results = tql.query(data, 'age >= 30 AND city = "NYC"')
|
|
139
|
+
# Returns: [{'name': 'Alice', ...}, {'name': 'Charlie', ...}]
|
|
140
|
+
|
|
141
|
+
# Field transformations
|
|
142
|
+
results = tql.query(data, 'name | lowercase = "alice"')
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Query OpenSearch
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
from opensearchpy import OpenSearch
|
|
149
|
+
from tql import TQL
|
|
150
|
+
|
|
151
|
+
# Initialize OpenSearch client
|
|
152
|
+
client = OpenSearch(
|
|
153
|
+
hosts=['localhost:9200'],
|
|
154
|
+
http_auth=('admin', 'admin'),
|
|
155
|
+
use_ssl=True,
|
|
156
|
+
verify_certs=False
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Initialize TQL with field mappings
|
|
160
|
+
mappings = {
|
|
161
|
+
'user.name': {'type': 'keyword'},
|
|
162
|
+
'user.email': {'type': 'text'},
|
|
163
|
+
'timestamp': {'type': 'date'}
|
|
164
|
+
}
|
|
165
|
+
tql = TQL(mappings)
|
|
166
|
+
|
|
167
|
+
# Execute queries with automatic DSL translation
|
|
168
|
+
results = tql.execute_opensearch(
|
|
169
|
+
opensearch_client=client,
|
|
170
|
+
index='users-*',
|
|
171
|
+
query='user.name = "admin" AND status = "active"'
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Complex queries with mutators and post-processing
|
|
175
|
+
results = tql.execute_opensearch(
|
|
176
|
+
opensearch_client=client,
|
|
177
|
+
index='logs-*',
|
|
178
|
+
query='email | lowercase contains "@example.com" AND level = "ERROR"'
|
|
179
|
+
)
|
|
180
|
+
# TQL automatically applies post-processing for mutators
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## 🎯 Core Features
|
|
186
|
+
|
|
187
|
+
### 🔍 **Unified Query Syntax**
|
|
188
|
+
Write one query, run it anywhere - files, OpenSearch, in-memory data:
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
# Same query works everywhere
|
|
192
|
+
query = 'status = "active" AND age > 25'
|
|
193
|
+
|
|
194
|
+
# Query files
|
|
195
|
+
tql.query("users.json", query)
|
|
196
|
+
|
|
197
|
+
# Query OpenSearch
|
|
198
|
+
tql.execute_opensearch(client, "users-*", query)
|
|
199
|
+
|
|
200
|
+
# Query Python data
|
|
201
|
+
tql.query(python_list, query)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### 📁 **First-Class File Support**
|
|
205
|
+
Query files as easily as databases:
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
# JSON/JSONL files
|
|
209
|
+
tql.query("logs.jsonl", "level = 'ERROR'")
|
|
210
|
+
|
|
211
|
+
# CSV files with automatic header detection
|
|
212
|
+
tql.query("products.csv", "price > 100 AND stock < 10")
|
|
213
|
+
|
|
214
|
+
# Folders with glob patterns
|
|
215
|
+
tql.query("logs/2024/*.jsonl", "status = 500", recursive=True)
|
|
216
|
+
|
|
217
|
+
# Streaming for large files (CLI)
|
|
218
|
+
$ tql 'status = 200' large-file.jsonl # Processes without loading to memory
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### 🔄 **25+ Field Mutators**
|
|
222
|
+
Transform data inline before comparison:
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
# String transformations
|
|
226
|
+
'email | lowercase | trim = "admin@example.com"'
|
|
227
|
+
'name | uppercase = "JOHN DOE"'
|
|
228
|
+
|
|
229
|
+
# Encoding/decoding
|
|
230
|
+
'data | b64decode | lowercase = "secret"'
|
|
231
|
+
'password | md5 = "5f4dcc3b5aa765d61d8327deb882cf99"'
|
|
232
|
+
|
|
233
|
+
# Network operations
|
|
234
|
+
'ip | is_private = true' # Check if IP is RFC 1918
|
|
235
|
+
'domain | defang = "hxxp://evil[.]com"' # Security analysis
|
|
236
|
+
|
|
237
|
+
# DNS lookups
|
|
238
|
+
'hostname | nslookup contains "8.8.8.8"'
|
|
239
|
+
|
|
240
|
+
# GeoIP enrichment
|
|
241
|
+
'ip | geoip.country_name = "United States"'
|
|
242
|
+
|
|
243
|
+
# List operations
|
|
244
|
+
'scores | avg > 80'
|
|
245
|
+
'prices | sum between [100, 500]'
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### 📊 **Statistical Aggregations**
|
|
249
|
+
Analyze data with built-in stats functions:
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
# Simple aggregations
|
|
253
|
+
tql.query(data, '| stats count(), sum(revenue), avg(price)')
|
|
254
|
+
|
|
255
|
+
# Grouped analysis
|
|
256
|
+
tql.query(data, '| stats count() by status, region')
|
|
257
|
+
|
|
258
|
+
# Top N analysis
|
|
259
|
+
tql.query(data, '| stats sum(sales, top 10) by product')
|
|
260
|
+
|
|
261
|
+
# Combined filtering and stats
|
|
262
|
+
tql.query(data, 'region = "west" | stats avg(revenue) by category')
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
### 🔧 **OpenSearch Integration**
|
|
266
|
+
Seamless OpenSearch/Elasticsearch integration:
|
|
267
|
+
|
|
268
|
+
- **Automatic DSL Translation**: TQL queries → OpenSearch Query DSL
|
|
269
|
+
- **Smart Field Mapping**: Handles keyword vs text fields automatically
|
|
270
|
+
- **Post-Processing**: Apply mutators that OpenSearch can't handle
|
|
271
|
+
- **Pagination Support**: Handle large result sets efficiently
|
|
272
|
+
|
|
273
|
+
```python
|
|
274
|
+
# TQL handles field mapping automatically
|
|
275
|
+
mappings = {'user.email': {'type': 'text', 'fields': {'keyword': {'type': 'keyword'}}}}
|
|
276
|
+
tql = TQL(mappings)
|
|
277
|
+
|
|
278
|
+
# Exact match uses .keyword automatically
|
|
279
|
+
query = 'user.email = "admin@example.com"' # Uses user.email.keyword
|
|
280
|
+
|
|
281
|
+
# Mutators trigger post-processing when needed
|
|
282
|
+
query = 'user.email | lowercase contains "admin"' # Post-processes results
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## 📖 Syntax Guide
|
|
288
|
+
|
|
289
|
+
### Comparison Operators
|
|
290
|
+
|
|
291
|
+
```python
|
|
292
|
+
# Equality
|
|
293
|
+
'status = "active"' # Exact match (alias: eq)
|
|
294
|
+
'status != "inactive"' # Not equal (alias: ne)
|
|
295
|
+
|
|
296
|
+
# Numeric comparisons
|
|
297
|
+
'age > 25' # Greater than
|
|
298
|
+
'age >= 18' # Greater or equal
|
|
299
|
+
'age < 65' # Less than
|
|
300
|
+
'age <= 100' # Less or equal
|
|
301
|
+
|
|
302
|
+
# String operations
|
|
303
|
+
'email contains "@example.com"' # Substring
|
|
304
|
+
'name startswith "John"' # Prefix
|
|
305
|
+
'filename endswith ".pdf"' # Suffix
|
|
306
|
+
'email regexp "^\\w+@\\w+\\.\\w+$"' # Regex
|
|
307
|
+
|
|
308
|
+
# Range and membership
|
|
309
|
+
'age between [18, 65]' # Inclusive range
|
|
310
|
+
'status in ["active", "pending"]' # Value in list
|
|
311
|
+
'priority range [1, 5]' # Alias for between
|
|
312
|
+
|
|
313
|
+
# Existence checks
|
|
314
|
+
'field exists' # Field is present
|
|
315
|
+
'field not exists' # Field is missing
|
|
316
|
+
'field is null' # Field is null
|
|
317
|
+
'field is not null' # Field is not null
|
|
318
|
+
|
|
319
|
+
# Network operations
|
|
320
|
+
'ip cidr "192.168.0.0/16"' # IP in CIDR range
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### Logical Operators
|
|
324
|
+
|
|
325
|
+
```python
|
|
326
|
+
# AND (all conditions must be true)
|
|
327
|
+
'age > 25 AND city = "NYC"'
|
|
328
|
+
'status = "active" AND role in ["admin", "moderator"]'
|
|
329
|
+
|
|
330
|
+
# OR (any condition must be true)
|
|
331
|
+
'city = "NYC" OR city = "LA"'
|
|
332
|
+
'status = "admin" OR role = "superuser"'
|
|
333
|
+
|
|
334
|
+
# NOT (negates condition)
|
|
335
|
+
'NOT (age < 18)'
|
|
336
|
+
'NOT status = "deleted"'
|
|
337
|
+
|
|
338
|
+
# Complex expressions with parentheses
|
|
339
|
+
'(age > 25 AND city = "NYC") OR (status = "vip" AND score > 90)'
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
### Collection Operators
|
|
343
|
+
|
|
344
|
+
```python
|
|
345
|
+
# ANY - at least one array element matches
|
|
346
|
+
'ANY tags = "premium"'
|
|
347
|
+
'ANY user.roles = "admin"'
|
|
348
|
+
|
|
349
|
+
# ALL - every array element matches
|
|
350
|
+
'ALL scores >= 80'
|
|
351
|
+
'ALL status = "active"'
|
|
352
|
+
|
|
353
|
+
# NONE - no array elements match
|
|
354
|
+
'NONE flags = "spam"'
|
|
355
|
+
'NONE violations.severity = "critical"'
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### Nested Field Access
|
|
359
|
+
|
|
360
|
+
```python
|
|
361
|
+
# Dot notation for nested objects
|
|
362
|
+
'user.profile.email contains "@example.com"'
|
|
363
|
+
'metadata.tags.priority = "high"'
|
|
364
|
+
|
|
365
|
+
# Array indexing
|
|
366
|
+
'tags[0] = "urgent"'
|
|
367
|
+
'history[5].status = "completed"'
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### Field Mutators Reference
|
|
371
|
+
|
|
372
|
+
#### **String Mutators**
|
|
373
|
+
- `lowercase`, `uppercase` - Case conversion
|
|
374
|
+
- `trim` - Remove whitespace
|
|
375
|
+
- `split(delimiter)` - Split string into array
|
|
376
|
+
- `length` - Get string length
|
|
377
|
+
- `replace(old, new)` - Replace substring
|
|
378
|
+
|
|
379
|
+
#### **Encoding Mutators**
|
|
380
|
+
- `b64encode`, `b64decode` - Base64 encoding/decoding
|
|
381
|
+
- `urldecode` - URL decode
|
|
382
|
+
- `hexencode`, `hexdecode` - Hex encoding/decoding
|
|
383
|
+
- `md5`, `sha256` - Cryptographic hashing
|
|
384
|
+
|
|
385
|
+
#### **Network/Security Mutators**
|
|
386
|
+
- `refang` - Convert defanged indicators (hxxp → http)
|
|
387
|
+
- `defang` - Defang URLs for safe display
|
|
388
|
+
- `is_private` - Check if IP is private (RFC 1918)
|
|
389
|
+
- `is_global` - Check if IP is globally routable
|
|
390
|
+
|
|
391
|
+
#### **DNS Mutators**
|
|
392
|
+
- `nslookup` - Resolve hostname to IP addresses
|
|
393
|
+
|
|
394
|
+
#### **GeoIP Mutators**
|
|
395
|
+
- `geoip` - Enrich IP with geolocation data
|
|
396
|
+
- Returns: `geo.country_name`, `geo.city_name`, `geo.location`, `geo.continent_code`, etc.
|
|
397
|
+
|
|
398
|
+
#### **List Mutators**
|
|
399
|
+
- `any`, `all` - Boolean aggregations
|
|
400
|
+
- `avg`, `average` - Calculate mean
|
|
401
|
+
- `sum` - Calculate sum
|
|
402
|
+
- `min`, `max` - Find min/max values
|
|
403
|
+
|
|
404
|
+
---
|
|
405
|
+
|
|
406
|
+
## 📊 Statistical Aggregations
|
|
407
|
+
|
|
408
|
+
TQL includes a powerful stats engine for data analysis:
|
|
409
|
+
|
|
410
|
+
### Available Functions
|
|
411
|
+
|
|
412
|
+
```python
|
|
413
|
+
# Counting
|
|
414
|
+
'| stats count()' # Count all records
|
|
415
|
+
'| stats count(field)' # Count non-null values
|
|
416
|
+
'| stats unique_count(field)' # Count distinct values
|
|
417
|
+
|
|
418
|
+
# Numeric aggregations
|
|
419
|
+
'| stats sum(revenue)' # Calculate sum
|
|
420
|
+
'| stats avg(price)' # Calculate average (aliases: average, mean)
|
|
421
|
+
'| stats min(age), max(age)' # Find min/max values
|
|
422
|
+
'| stats median(score)' # Calculate median
|
|
423
|
+
|
|
424
|
+
# Statistical measures
|
|
425
|
+
'| stats std(values)' # Standard deviation (aliases: stdev)
|
|
426
|
+
'| stats percentile(score, 95)' # Calculate percentile
|
|
427
|
+
|
|
428
|
+
# Value extraction
|
|
429
|
+
'| stats values(category)' # Get unique values
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
### Grouping and Top N
|
|
433
|
+
|
|
434
|
+
```python
|
|
435
|
+
# Group by single field
|
|
436
|
+
'| stats count() by status'
|
|
437
|
+
|
|
438
|
+
# Group by multiple fields
|
|
439
|
+
'| stats sum(revenue) by region, category'
|
|
440
|
+
|
|
441
|
+
# Top N analysis
|
|
442
|
+
'| stats sum(sales, top 10) by product'
|
|
443
|
+
|
|
444
|
+
# Multiple aggregations
|
|
445
|
+
'| stats count(), sum(revenue), avg(price) by status'
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
### Combined Filtering and Stats
|
|
449
|
+
|
|
450
|
+
```python
|
|
451
|
+
# Filter then aggregate
|
|
452
|
+
'status = "success" AND region = "west" | stats avg(revenue) by category'
|
|
453
|
+
|
|
454
|
+
# Complex analytics
|
|
455
|
+
'timestamp > "2024-01-01" | stats count(), sum(bytes), avg(response_time) by endpoint'
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
---
|
|
459
|
+
|
|
460
|
+
## 🔌 OpenSearch Integration Guide
|
|
461
|
+
|
|
462
|
+
### Setup
|
|
463
|
+
|
|
464
|
+
```python
|
|
465
|
+
from opensearchpy import OpenSearch
|
|
466
|
+
from tql import TQL
|
|
467
|
+
|
|
468
|
+
# Create OpenSearch client
|
|
469
|
+
client = OpenSearch(
|
|
470
|
+
hosts=['localhost:9200'],
|
|
471
|
+
http_auth=('admin', 'admin'),
|
|
472
|
+
use_ssl=True,
|
|
473
|
+
verify_certs=False
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Get index mappings
|
|
477
|
+
response = client.indices.get_mapping(index='users-*')
|
|
478
|
+
mappings = response['users-2024']['mappings']['properties']
|
|
479
|
+
|
|
480
|
+
# Initialize TQL with mappings
|
|
481
|
+
tql = TQL(mappings)
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
### Query Translation
|
|
485
|
+
|
|
486
|
+
TQL automatically translates queries to OpenSearch DSL:
|
|
487
|
+
|
|
488
|
+
```python
|
|
489
|
+
# TQL Query
|
|
490
|
+
query = 'age > 25 AND status = "active"'
|
|
491
|
+
|
|
492
|
+
# Translates to OpenSearch DSL:
|
|
493
|
+
{
|
|
494
|
+
"query": {
|
|
495
|
+
"bool": {
|
|
496
|
+
"must": [
|
|
497
|
+
{"range": {"age": {"gt": 25}}},
|
|
498
|
+
{"term": {"status.keyword": "active"}}
|
|
499
|
+
]
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
# Execute seamlessly
|
|
505
|
+
results = tql.execute_opensearch(client, 'users-*', query)
|
|
506
|
+
```
|
|
507
|
+
|
|
508
|
+
### Field Mapping Intelligence
|
|
509
|
+
|
|
510
|
+
TQL automatically handles field types:
|
|
511
|
+
|
|
512
|
+
```python
|
|
513
|
+
# Text field with keyword subfield
|
|
514
|
+
mappings = {
|
|
515
|
+
'email': {
|
|
516
|
+
'type': 'text',
|
|
517
|
+
'fields': {
|
|
518
|
+
'keyword': {'type': 'keyword'}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
# Exact match - uses .keyword automatically
|
|
524
|
+
'email = "admin@example.com"' # → term query on email.keyword
|
|
525
|
+
|
|
526
|
+
# Full-text search - uses text field
|
|
527
|
+
'email contains "example"' # → match query on email
|
|
528
|
+
|
|
529
|
+
# Case-insensitive - triggers post-processing
|
|
530
|
+
'email | lowercase = "admin@example.com"' # → fetch + filter
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
### Post-Processing
|
|
534
|
+
|
|
535
|
+
When OpenSearch can't handle operations, TQL applies post-processing:
|
|
536
|
+
|
|
537
|
+
```python
|
|
538
|
+
# Mutators that require post-processing
|
|
539
|
+
'email | lowercase contains "admin"' # Post-process: case conversion
|
|
540
|
+
'data | b64decode contains "secret"' # Post-process: decode
|
|
541
|
+
'ip | geoip.country = "US"' # Post-process: GeoIP lookup
|
|
542
|
+
|
|
543
|
+
# TQL automatically:
|
|
544
|
+
# 1. Executes base query in OpenSearch
|
|
545
|
+
# 2. Fetches results
|
|
546
|
+
# 3. Applies mutators in Python
|
|
547
|
+
# 4. Filters results
|
|
548
|
+
# 5. Returns final matches
|
|
549
|
+
```
|
|
550
|
+
|
|
551
|
+
### Query Analysis
|
|
552
|
+
|
|
553
|
+
Analyze queries before execution to understand performance implications:
|
|
554
|
+
|
|
555
|
+
```python
|
|
556
|
+
# Analyze query health
|
|
557
|
+
analysis = tql.analyze_query('email | lowercase contains "admin"', context='opensearch')
|
|
558
|
+
|
|
559
|
+
print(f"Health: {analysis['health']['status']}") # 'fair' (post-processing)
|
|
560
|
+
print(f"Score: {analysis['health']['score']}") # 85
|
|
561
|
+
print(f"Post-processing: {analysis['mutator_health']['requires_post_processing']}") # True
|
|
562
|
+
|
|
563
|
+
# Recommendations for optimization
|
|
564
|
+
for issue in analysis['health']['issues']:
|
|
565
|
+
print(f"Issue: {issue['message']}")
|
|
566
|
+
print(f"Fix: {issue['recommendation']}")
|
|
567
|
+
```
|
|
568
|
+
|
|
569
|
+
---
|
|
570
|
+
|
|
571
|
+
## 📚 Documentation
|
|
572
|
+
|
|
573
|
+
Comprehensive documentation is available in the [docs/](./docs/) directory:
|
|
574
|
+
|
|
575
|
+
- **[Getting Started](./docs/user-guide/getting-started.md)** - Quick introduction and basic concepts
|
|
576
|
+
- **[Query Basics](./docs/user-guide/query-basics.md)** - Syntax fundamentals
|
|
577
|
+
- **[Operators Reference](./docs/user-guide/operators-reference.md)** - Complete operator guide
|
|
578
|
+
- **[Mutators Reference](./docs/user-guide/mutators-reference.md)** - All 25+ mutator functions
|
|
579
|
+
- **[OpenSearch Integration](./docs/opensearch/index.md)** - Complete OpenSearch guide
|
|
580
|
+
- **[Stats & Aggregations](./docs/stats/index.md)** - Statistical analysis guide
|
|
581
|
+
- **[API Reference](./docs/api-reference/index.md)** - Python API documentation
|
|
582
|
+
- **[Examples](./docs/cookbook/index.md)** - Real-world query examples
|
|
583
|
+
|
|
584
|
+
---
|
|
585
|
+
|
|
586
|
+
## ⚡ Performance
|
|
587
|
+
|
|
588
|
+
### Benchmarks
|
|
589
|
+
|
|
590
|
+
**Python Implementation:**
|
|
591
|
+
- In-memory queries: ~10,000 records/sec
|
|
592
|
+
- File parsing (JSON): ~5MB/sec
|
|
593
|
+
- OpenSearch queries: Limited by network latency
|
|
594
|
+
|
|
595
|
+
**Rust CLI (300x faster):**
|
|
596
|
+
- In-memory queries: ~3,000,000 records/sec
|
|
597
|
+
- File parsing (JSON): ~150MB/sec
|
|
598
|
+
- Large file streaming: Process 50MB in ~200ms
|
|
599
|
+
|
|
600
|
+
### Optimization Tips
|
|
601
|
+
|
|
602
|
+
```python
|
|
603
|
+
# Use CLI for large files (300x faster)
|
|
604
|
+
$ tql 'status = 200' 50MB-file.jsonl # ✓ Fast (Rust)
|
|
605
|
+
$ python -m tql 'status = 200' 50MB-file.jsonl # ✗ Slow (Python)
|
|
606
|
+
|
|
607
|
+
# Pre-compile queries for reuse
|
|
608
|
+
ast = tql.parse('age > 25 AND status = "active"')
|
|
609
|
+
results1 = tql.evaluate(ast, dataset1)
|
|
610
|
+
results2 = tql.evaluate(ast, dataset2)
|
|
611
|
+
|
|
612
|
+
# Use OpenSearch for large datasets
|
|
613
|
+
tql.execute_opensearch(client, 'huge-index-*', query) # Leverages OpenSearch's speed
|
|
614
|
+
|
|
615
|
+
# Minimize post-processing
|
|
616
|
+
'email.keyword = "admin@example.com"' # ✓ Fast (OpenSearch only)
|
|
617
|
+
'email | lowercase = "admin@example.com"' # ✗ Slower (post-processing)
|
|
618
|
+
```
|
|
619
|
+
|
|
620
|
+
---
|
|
621
|
+
|
|
622
|
+
## 🛠️ Development
|
|
623
|
+
|
|
624
|
+
### Installation
|
|
625
|
+
|
|
626
|
+
```bash
|
|
627
|
+
# Clone repository
|
|
628
|
+
git clone https://github.com/tellaro/tellaro-query-language.git
|
|
629
|
+
cd tellaro-query-language
|
|
630
|
+
|
|
631
|
+
# Install with Poetry (recommended)
|
|
632
|
+
poetry install
|
|
633
|
+
|
|
634
|
+
# Or with pip
|
|
635
|
+
pip install -e .
|
|
636
|
+
```
|
|
637
|
+
|
|
638
|
+
### Testing
|
|
639
|
+
|
|
640
|
+
```bash
|
|
641
|
+
# Run all tests
|
|
642
|
+
poetry run tests
|
|
643
|
+
|
|
644
|
+
# Run specific test file
|
|
645
|
+
poetry run pytest tests/test_parser.py -v
|
|
646
|
+
|
|
647
|
+
# Run with coverage
|
|
648
|
+
poetry run cov
|
|
649
|
+
|
|
650
|
+
# Run integration tests (requires OpenSearch)
|
|
651
|
+
cp .env.example .env # Configure OpenSearch connection
|
|
652
|
+
poetry run pytest tests/test_opensearch_integration.py -v
|
|
653
|
+
```
|
|
654
|
+
|
|
655
|
+
### Code Quality
|
|
656
|
+
|
|
657
|
+
```bash
|
|
658
|
+
# Format code
|
|
659
|
+
poetry run black .
|
|
660
|
+
|
|
661
|
+
# Type checking
|
|
662
|
+
poetry run mypy src
|
|
663
|
+
|
|
664
|
+
# Linting
|
|
665
|
+
poetry run pylint src
|
|
666
|
+
poetry run flake8 src
|
|
667
|
+
|
|
668
|
+
# Security checks
|
|
669
|
+
poetry run bandit -r src/
|
|
670
|
+
```
|
|
671
|
+
|
|
672
|
+
---
|
|
673
|
+
|
|
674
|
+
## 🗺️ Roadmap
|
|
675
|
+
|
|
676
|
+
### ✅ Implemented Features
|
|
677
|
+
- ✅ Core query engine with all operators
|
|
678
|
+
- ✅ 25+ field mutators (string, encoding, network, DNS, GeoIP, list)
|
|
679
|
+
- ✅ Statistical aggregations with grouping
|
|
680
|
+
- ✅ File support (JSON, JSONL, CSV)
|
|
681
|
+
- ✅ OpenSearch/Elasticsearch backend
|
|
682
|
+
- ✅ Intelligent post-processing
|
|
683
|
+
- ✅ Rust CLI for performance
|
|
684
|
+
- ✅ Mutator caching for GeoIP/DNS
|
|
685
|
+
- ✅ Query health analysis
|
|
686
|
+
|
|
687
|
+
### 🚧 In Progress
|
|
688
|
+
- 🚧 OpenSearch stats aggregation translation
|
|
689
|
+
- 🚧 Additional hash functions (SHA1, SHA512)
|
|
690
|
+
- 🚧 JSON parsing mutator
|
|
691
|
+
- 🚧 Timestamp conversion mutators
|
|
692
|
+
|
|
693
|
+
### 📋 Planned Features
|
|
694
|
+
- 📋 ElasticSearch backend support
|
|
695
|
+
- 📋 PostgreSQL/MySQL backends
|
|
696
|
+
- 📋 Query optimization engine
|
|
697
|
+
- 📋 Custom mutator plugins
|
|
698
|
+
- 📋 GraphQL-style field selection
|
|
699
|
+
- 📋 Parallel record evaluation
|
|
700
|
+
- 📋 Incremental file processing
|
|
701
|
+
|
|
702
|
+
### 🔮 Future Considerations
|
|
703
|
+
- 🔮 Time-series specific operators
|
|
704
|
+
- 🔮 Machine learning integration
|
|
705
|
+
- 🔮 Distributed query execution
|
|
706
|
+
- 🔮 Query caching layer
|
|
707
|
+
|
|
708
|
+
---
|
|
709
|
+
|
|
710
|
+
## 🤝 Contributing
|
|
711
|
+
|
|
712
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](./docs/developer/contributing.md) for guidelines.
|
|
713
|
+
|
|
714
|
+
### How to Contribute
|
|
715
|
+
|
|
716
|
+
1. Fork the repository
|
|
717
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
718
|
+
3. Make your changes
|
|
719
|
+
4. Run tests (`poetry run tests`)
|
|
720
|
+
5. Run linters (`poetry run lint`)
|
|
721
|
+
6. Commit changes (`git commit -m 'Add amazing feature'`)
|
|
722
|
+
7. Push to branch (`git push origin feature/amazing-feature`)
|
|
723
|
+
8. Open a Pull Request
|
|
724
|
+
|
|
725
|
+
---
|
|
726
|
+
|
|
727
|
+
## 📄 License
|
|
728
|
+
|
|
729
|
+
**Tellaro Query Language (TQL)** is **source-available** software with specific usage terms:
|
|
730
|
+
|
|
731
|
+
✅ **Permitted Uses:**
|
|
732
|
+
- Personal use (individual, non-commercial)
|
|
733
|
+
- Organizational use (within your company/organization)
|
|
734
|
+
- Integration into your applications and services
|
|
735
|
+
- Internal tools and automation
|
|
736
|
+
|
|
737
|
+
❌ **Restricted Uses:**
|
|
738
|
+
- Creating derivative query language products
|
|
739
|
+
- Commercial redistribution or resale
|
|
740
|
+
- Offering TQL-based commercial services to third parties
|
|
741
|
+
- Using source code to build competing products
|
|
742
|
+
|
|
743
|
+
For commercial licensing inquiries, contact: **support@tellaro.io**
|
|
744
|
+
|
|
745
|
+
See [LICENSE](LICENSE) for complete terms and conditions.
|
|
746
|
+
|
|
747
|
+
---
|
|
748
|
+
|
|
749
|
+
## 🔗 Related Projects
|
|
750
|
+
|
|
751
|
+
- **[TQL Rust](https://crates.io/crates/tellaro-query-language)** - High-performance Rust implementation
|
|
752
|
+
- **[Tellaro Platform](https://github.com/tellaro)** - Security operations platform using TQL
|
|
753
|
+
|
|
754
|
+
---
|
|
755
|
+
|
|
756
|
+
## 💬 Support
|
|
757
|
+
|
|
758
|
+
- **Issues**: [GitHub Issues](https://github.com/tellaro/tellaro-query-language/issues)
|
|
759
|
+
- **Documentation**: [Full Documentation](./docs/)
|
|
760
|
+
- **Examples**: [Cookbook](./docs/cookbook/)
|
|
761
|
+
- **Email**: support@tellaro.io
|
|
762
|
+
|
|
763
|
+
---
|
|
764
|
+
|
|
765
|
+
## 🌟 Quick Examples
|
|
766
|
+
|
|
767
|
+
### Security Log Analysis
|
|
768
|
+
|
|
769
|
+
```python
|
|
770
|
+
# Find high-severity events from private IPs
|
|
771
|
+
query = '''
|
|
772
|
+
source_ip | is_private = true AND
|
|
773
|
+
severity in ["high", "critical"] AND
|
|
774
|
+
(ANY tags = "malware" OR url | defang contains "suspicious")
|
|
775
|
+
'''
|
|
776
|
+
results = tql.query("security-logs.jsonl", query)
|
|
777
|
+
```
|
|
778
|
+
|
|
779
|
+
### E-commerce Analytics
|
|
780
|
+
|
|
781
|
+
```python
|
|
782
|
+
# Analyze sales by region for premium products
|
|
783
|
+
query = '''
|
|
784
|
+
product_tier = "premium" AND
|
|
785
|
+
order_date > "2024-01-01" |
|
|
786
|
+
stats sum(revenue), avg(order_value), count() by region
|
|
787
|
+
'''
|
|
788
|
+
results = tql.query("sales.json", query)
|
|
789
|
+
```
|
|
790
|
+
|
|
791
|
+
### System Monitoring
|
|
792
|
+
|
|
793
|
+
```python
|
|
794
|
+
# Find servers with high resource usage
|
|
795
|
+
query = '''
|
|
796
|
+
hostname | nslookup exists AND
|
|
797
|
+
(cpu_usage > 80 OR memory_usage > 90) AND
|
|
798
|
+
status = "production"
|
|
799
|
+
'''
|
|
800
|
+
results = tql.execute_opensearch(client, "metrics-*", query)
|
|
801
|
+
```
|
|
802
|
+
|
|
803
|
+
---
|
|
804
|
+
|
|
805
|
+
**Made with ❤️ by the Tellaro Team**
|
|
806
|
+
|