tellaro-query-language 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tellaro-query-language
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: A flexible, human-friendly query language for searching and filtering structured data
5
5
  Home-page: https://github.com/tellaro/tellaro-query-language
6
6
  License: MIT
@@ -4,12 +4,12 @@ tql/cache/__init__.py,sha256=GIzIEMZUZEYJj72sAhuVLEG-OJEKUG2srUWNM3Ix-T8,213
4
4
  tql/cache/base.py,sha256=0b-8uyh3JltayGmXQI45snTqsM5sQu9u0KcNvZIRa-I,687
5
5
  tql/cache/memory.py,sha256=ibcmQSAxNvqCy6DksbU7gLu6UArYp1u3fW-oLubxtV0,2056
6
6
  tql/cache/redis.py,sha256=ZU_IsVDvpSYpNvPfnZ4iulJDODpEGx3c4dkXLzPzPVc,2309
7
- tql/core.py,sha256=zAkTStN_3logY9ARABDZNsQJfXV5rITiiTKoX5bo274,40859
7
+ tql/core.py,sha256=zIihZijiwqejG78EIRikTtaDvzMgcy2cwBCL3-HwZy8,40797
8
8
  tql/core_components/README.md,sha256=Rm7w4UHdQ0vPBEFybE5b62IOvSA5Nzq2GRvtBHOapmc,3068
9
9
  tql/core_components/__init__.py,sha256=v8BBybPlqV7dkVY9mw1mblvqyAFJZ7Pf_bEc-jAL7FI,643
10
10
  tql/core_components/file_operations.py,sha256=Jr0kkxz_OP2KHOAsIr7KMtYe_lbu8LuBUySt2LQbjJw,3925
11
- tql/core_components/opensearch_operations.py,sha256=x0GPdQfMX-NYCzzAlJCbtzYFa6HmaKxGEVl2JdAhKJY,35447
12
- tql/core_components/stats_operations.py,sha256=9WSEMdAvtZA6deckGwyetPAtnDODFqlnrhvlBbNkBcs,7469
11
+ tql/core_components/opensearch_operations.py,sha256=HIiSosL2uek88H6aPkX_QDnlgV8bT1fbotG652Z2vdE,37900
12
+ tql/core_components/stats_operations.py,sha256=zAfDhVOFFPMrRIMw6Qtjxbobbdi7ao_HuHBCcVc3BGY,7579
13
13
  tql/core_components/validation_operations.py,sha256=_VPXh0HABBjsXF99jFT7B6-5QAPsADOCy6poinGrxeE,22454
14
14
  tql/evaluator.py,sha256=YdgS1vuxUEPAHhUsZey-Y4NydeS8CTYOy_O8R5_K8cE,15421
15
15
  tql/evaluator_components/README.md,sha256=c59yf2au34yPhrru7JWgGop_ORteB6w5vfMhsac8j3k,3882
@@ -36,21 +36,21 @@ tql/opensearch_components/field_mapping.py,sha256=N4r7VkzNeXjIhNDt2cfnd1LbkvaS_9
36
36
  tql/opensearch_components/lucene_converter.py,sha256=ZbupWZ2smGhWfE9cSIrNo7MZVY35l2t86HYM-bd7nKw,12436
37
37
  tql/opensearch_components/query_converter.py,sha256=9Nkhehb8X7jIQcyRaHCa5WCKWtovdRGKuSdDsBulrG0,36722
38
38
  tql/opensearch_mappings.py,sha256=zJCCdMrxK7mswrkxd5LiOhunQ9GIJNZdhktVoGXgVgk,11529
39
- tql/opensearch_stats.py,sha256=svcE23lD1gDe1iYGlC388WE8X3x9C8tjfBylUVy2Uqo,17216
39
+ tql/opensearch_stats.py,sha256=h-hA2EZ-sc4S1zxr7jaInmonfrgTXoGBbb9sOYurdFE,17823
40
40
  tql/parser.py,sha256=dnjgc-sDVihe-VIVPT_SRULeng4OWaLtkbM23dluT6M,75532
41
41
  tql/parser_components/README.md,sha256=lvQX72ckq2zyotGs8QIHHCIFqaA7bOHwkP44wU8Zoiw,2322
42
42
  tql/parser_components/__init__.py,sha256=zBwHBMPJyHSBbaOojf6qTrJYjJg5A6tPUE8nHFdRiQs,521
43
43
  tql/parser_components/ast_builder.py,sha256=-pbcYhZNoRm0AnjmJRAAlXLCAwHfauchTpX_6KO0plE,6793
44
44
  tql/parser_components/error_analyzer.py,sha256=qlCD9vKyW73aeKQYI33P1OjIWSJ3LPd08wuN9cis2fU,4012
45
45
  tql/parser_components/field_extractor.py,sha256=TumeuUo2c5gPYVbTPsmU43C3TJFC8chAAWERu5v_Q3c,4182
46
- tql/parser_components/grammar.py,sha256=WtmgANV49Jx1uHNBeiyElpXqKy3feQOWj9aYdxGn3Ww,19182
46
+ tql/parser_components/grammar.py,sha256=lSvjABvEBaH29-ad-_UGD4WmofdNwC_pO2OKQJ_It-U,19309
47
47
  tql/post_processor.py,sha256=-vA2wgbuLij2FVnj5I9HDHtw5bKj9Cu3EE9mtoeSWk8,28859
48
48
  tql/scripts.py,sha256=VOr5vCjIvKlW36kwvJx7JGFIRM16IJZlbJcWlBexBtk,3728
49
- tql/stats_evaluator.py,sha256=GtmygLdPPMhAMR5bN1h69cQsNBYqg-jQl6bN5rwEx6Q,15692
49
+ tql/stats_evaluator.py,sha256=lOEbICFuP0krZZqEjREz37xlpm35_P6eRgkHVgJLNI4,15703
50
50
  tql/stats_transformer.py,sha256=MT-4rDWZSySgn4Fuq9H0c-mvwFYLM6FqWpPv2rHX-rE,7588
51
51
  tql/validators.py,sha256=e9MlX-zQ_O3M8YP8vXyMjKU8iiJMTh6mMK0iv0_4gTY,3771
52
- tellaro_query_language-0.1.6.dist-info/LICENSE,sha256=zRhQ85LnW55fWgAjQctckwQ67DX5Jmt64lq343ThZFU,1063
53
- tellaro_query_language-0.1.6.dist-info/METADATA,sha256=5IbaJTGKJLQvYnXR-670n5rRISPIOmfekhqDie1V1MQ,15109
54
- tellaro_query_language-0.1.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
- tellaro_query_language-0.1.6.dist-info/entry_points.txt,sha256=H43APfGBMsZkKsUCnFTaqprQPW-Kce2yz2qsBL3dZrw,164
56
- tellaro_query_language-0.1.6.dist-info/RECORD,,
52
+ tellaro_query_language-0.1.8.dist-info/LICENSE,sha256=zRhQ85LnW55fWgAjQctckwQ67DX5Jmt64lq343ThZFU,1063
53
+ tellaro_query_language-0.1.8.dist-info/METADATA,sha256=7dSseBpILkxyeAW0B7QQ3psZORRfkM0Cai7l5uiQGCU,15109
54
+ tellaro_query_language-0.1.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
+ tellaro_query_language-0.1.8.dist-info/entry_points.txt,sha256=H43APfGBMsZkKsUCnFTaqprQPW-Kce2yz2qsBL3dZrw,164
56
+ tellaro_query_language-0.1.8.dist-info/RECORD,,
tql/core.py CHANGED
@@ -167,7 +167,7 @@ class TQL:
167
167
  return f"NOT {operand}"
168
168
  return str(ast)
169
169
 
170
- def query(
170
+ def query( # noqa: C901
171
171
  self, data: Union[List[Dict], str], query: str, size: int = 10, save_enrichment: bool = False
172
172
  ) -> Dict[str, Any]: # noqa: C901
173
173
  """Execute a TQL query against data and return results in execute_opensearch format.
@@ -470,8 +470,7 @@ class TQL:
470
470
  size: Number of results to return (default: 500)
471
471
  from_: Starting offset for pagination (default: 0)
472
472
  timestamp_field: Field name for timestamp filtering (default: "@timestamp")
473
- time_range: Optional time range dict with 'gte' and/or 'lte' keys.
474
- If None, defaults to 'gte': 'now-15m', 'lte': 'now'
473
+ time_range: Optional time range dict with 'gte' and/or 'lte' keys
475
474
  scan_all: If True, use scroll API to retrieve all matching documents
476
475
  scroll_size: Size per scroll when scan_all=True (default: 1000)
477
476
  scroll_timeout: Scroll timeout when scan_all=True (default: "5m")
@@ -311,10 +311,6 @@ class OpenSearchOperations:
311
311
  search_body = opensearch_query.copy()
312
312
 
313
313
  # Handle time range filtering
314
- if time_range is None:
315
- # Default time range: last 15 minutes
316
- time_range = {"gte": "now-15m", "lte": "now"}
317
-
318
314
  # Add time range filter to the query
319
315
  if time_range:
320
316
  base_query = search_body.get("query", {})
@@ -441,49 +437,93 @@ class OpenSearchOperations:
441
437
  fields = [agg.get("field") for agg in aggregations]
442
438
 
443
439
  stats_results = {
444
- "type": "stats",
440
+ "type": "stats_grouped",
445
441
  "operation": operations[0] if len(operations) == 1 else operations,
446
442
  "field": fields[0] if len(fields) == 1 else fields,
447
- "values": buckets, # Array of buckets for grouped results
443
+ "results": buckets, # Array of buckets for grouped results
448
444
  "group_by": group_by_fields,
449
445
  }
450
446
  else:
451
447
  # Simple aggregations without grouping
452
448
  if aggregations:
453
- first_agg = aggregations[0]
454
- func = first_agg.get("function", "")
455
- field = first_agg.get("field", "*")
456
-
457
- # Get the aggregation result
458
- # The alias is typically func_field_0 for the first aggregation
459
- alias = first_agg.get("alias") or f"{func}_{field}_0"
460
- agg_result = aggs_response.get(alias, {})
461
-
462
- # Extract the value based on aggregation type
463
- if func == "count":
464
- value = agg_result.get("value", 0)
465
- elif func in ["sum", "min", "max", "avg", "average"]:
466
- value = agg_result.get("value", 0)
467
- elif func == "unique_count":
468
- value = agg_result.get("value", 0)
469
- elif func in ["percentile", "percentiles", "p", "pct"]:
470
- # Percentiles return a values dict
471
- values_dict = agg_result.get("values", {})
472
- # For a single percentile, extract the value
473
- if len(values_dict) == 1:
474
- value = list(values_dict.values())[0]
449
+ if len(aggregations) == 1:
450
+ # Single aggregation
451
+ first_agg = aggregations[0]
452
+ func = first_agg.get("function", "")
453
+ field = first_agg.get("field", "*")
454
+
455
+ # Get the aggregation result
456
+ # The alias is typically func_field_0 for the first aggregation
457
+ alias = first_agg.get("alias") or f"{func}_{field}_0"
458
+ agg_result = aggs_response.get(alias, {})
459
+
460
+ # Extract the value based on aggregation type
461
+ if func == "count":
462
+ value = agg_result.get("value", 0)
463
+ elif func in ["sum", "min", "max", "avg", "average"]:
464
+ value = agg_result.get("value", 0)
465
+ elif func == "unique_count":
466
+ value = agg_result.get("value", 0)
467
+ elif func in ["percentile", "percentiles", "p", "pct"]:
468
+ # Percentiles return a values dict
469
+ values_dict = agg_result.get("values", {})
470
+ # For a single percentile, extract the value
471
+ if len(values_dict) == 1:
472
+ value = list(values_dict.values())[0]
473
+ else:
474
+ value = values_dict
475
+ elif func in ["values", "unique"]:
476
+ # Extract buckets from terms aggregation
477
+ buckets = agg_result.get("buckets", [])
478
+ value = [bucket["key"] for bucket in buckets]
475
479
  else:
476
- value = values_dict
480
+ value = agg_result
481
+
482
+ stats_results = {
483
+ "type": "stats",
484
+ "operation": func,
485
+ "field": field,
486
+ "values": value,
487
+ "group_by": [],
488
+ }
477
489
  else:
478
- value = agg_result
479
-
480
- stats_results = {
481
- "type": "stats",
482
- "operation": func,
483
- "field": field,
484
- "values": value,
485
- "group_by": [],
486
- }
490
+ # Multiple aggregations
491
+ agg_results = {}
492
+ for i, agg in enumerate(aggregations):
493
+ func = agg.get("function", "")
494
+ field = agg.get("field", "*")
495
+ alias = agg.get("alias") or f"{func}_{field}_{i}"
496
+ agg_result = aggs_response.get(alias, {})
497
+
498
+ # Extract the value based on aggregation type
499
+ if func == "count":
500
+ value = agg_result.get("value", 0)
501
+ elif func in ["sum", "min", "max", "avg", "average"]:
502
+ value = agg_result.get("value", 0)
503
+ elif func == "unique_count":
504
+ value = agg_result.get("value", 0)
505
+ elif func in ["percentile", "percentiles", "p", "pct"]:
506
+ # Percentiles return a values dict
507
+ values_dict = agg_result.get("values", {})
508
+ # For a single percentile, extract the value
509
+ if len(values_dict) == 1:
510
+ value = list(values_dict.values())[0]
511
+ else:
512
+ value = values_dict
513
+ elif func in ["values", "unique"]:
514
+ # Extract buckets from terms aggregation
515
+ buckets = agg_result.get("buckets", [])
516
+ value = [bucket["key"] for bucket in buckets]
517
+ else:
518
+ value = agg_result
519
+
520
+ key = agg.get("alias") or f"{func}_{field}"
521
+ agg_results[key] = value
522
+
523
+ stats_results = {
524
+ "type": "stats",
525
+ "results": agg_results,
526
+ }
487
527
  else:
488
528
  stats_results = {"type": "stats", "operation": "unknown", "field": "*", "values": 0, "group_by": []}
489
529
 
@@ -133,7 +133,8 @@ class StatsOperations:
133
133
  """
134
134
  # Parse the query
135
135
  try:
136
- if not query.strip().startswith("| stats") and "|" not in query:
136
+ # Don't add prefix if query already starts with "stats"
137
+ if not query.strip().startswith("| stats") and not query.strip().startswith("stats") and "|" not in query:
137
138
  query = "| stats " + query.strip()
138
139
 
139
140
  ast = self.parser.parse(query)
tql/opensearch_stats.py CHANGED
@@ -33,6 +33,9 @@ class OpenSearchStatsTranslator:
33
33
  "pct_rank": "percentile_ranks",
34
34
  "pct_ranks": "percentile_ranks",
35
35
  "zscore": None, # Requires post-processing
36
+ "values": "terms", # Return unique values
37
+ "unique": "terms", # Alias for values
38
+ "cardinality": "cardinality", # Count of unique values
36
39
  }
37
40
 
38
41
  # Aggregations that require numeric fields
@@ -146,6 +149,9 @@ class OpenSearchStatsTranslator:
146
149
  if not rank_values:
147
150
  raise TQLError("percentile_rank requires at least one value")
148
151
  aggs_dsl[alias] = {"percentile_ranks": {"field": field, "values": rank_values}}
152
+ elif func in ["values", "unique"]:
153
+ # Terms aggregation to get unique values
154
+ aggs_dsl[alias] = {"terms": {"field": field, "size": 10000}} # Large size to get all values
149
155
  else:
150
156
  # Direct mapping
151
157
  aggs_dsl[alias] = {os_agg_type: {"field": field}}
@@ -398,7 +404,7 @@ class OpenSearchStatsTranslator:
398
404
 
399
405
  def _extract_aggregation_value( # noqa: C901
400
406
  self, agg_result: Dict[str, Any], function: str
401
- ) -> Union[int, float, Dict[str, Any], None]:
407
+ ) -> Union[int, float, Dict[str, Any], List[Any], None]:
402
408
  """Extract value from OpenSearch aggregation result.
403
409
 
404
410
  Args:
@@ -447,5 +453,9 @@ class OpenSearchStatsTranslator:
447
453
  for k, v in values.items():
448
454
  result[f"rank_{k}"] = v
449
455
  return result
456
+ elif function in ["values", "unique"]:
457
+ # Extract buckets from terms aggregation
458
+ buckets = agg_result.get("buckets", [])
459
+ return [bucket["key"] for bucket in buckets]
450
460
  else:
451
461
  return None
@@ -380,8 +380,9 @@ class TQLGrammar:
380
380
  caseless=True,
381
381
  )
382
382
 
383
- # Special case for count(*)
383
+ # Special case for count(*) and count()
384
384
  self.count_all = CaselessKeyword("count") + Suppress("(") + Suppress("*") + Suppress(")")
385
+ self.count_empty = CaselessKeyword("count") + Suppress("(") + Suppress(")")
385
386
 
386
387
  # Aggregation function with field
387
388
  self.agg_function = (
@@ -395,6 +396,7 @@ class TQLGrammar:
395
396
  + Suppress(")")
396
397
  )
397
398
  | self.count_all
399
+ | self.count_empty
398
400
  )
399
401
 
400
402
  # Support for aliasing: sum(revenue) as total_revenue
tql/stats_evaluator.py CHANGED
@@ -198,7 +198,7 @@ class TQLStatsEvaluator:
198
198
 
199
199
  def _calculate_aggregation( # noqa: C901
200
200
  self, records: List[Dict[str, Any]], agg_spec: Dict[str, Any]
201
- ) -> Union[int, float, Dict[str, Any], None]:
201
+ ) -> Union[int, float, Dict[str, Any], List[Any], None]:
202
202
  """Calculate a single aggregation value.
203
203
 
204
204
  Args: