tellaro-query-language 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tql/parser.py CHANGED
@@ -24,6 +24,9 @@ class TQLParser:
24
24
  evaluated against data or converted to backend-specific query formats.
25
25
  """
26
26
 
27
+ # Maximum query depth to prevent stack overflow and DoS attacks
28
+ MAX_QUERY_DEPTH = 50
29
+
27
30
  def __init__(self):
28
31
  """Initialize the parser with TQL grammar."""
29
32
  self.grammar = TQLGrammar()
@@ -53,7 +56,8 @@ class TQLParser:
53
56
  parsed_result = self.grammar.tql_expr.parseString(query, parseAll=True)
54
57
 
55
58
  # Convert to our AST format
56
- return self._build_ast(parsed_result.asList()[0])
59
+ # Start depth counting at 0 from parse() entry point
60
+ return self._build_ast(parsed_result.asList()[0], depth=0)
57
61
 
58
62
  except ParseException as e:
59
63
  # Extract position and context from pyparsing exception
@@ -114,15 +118,29 @@ class TQLParser:
114
118
  # Extract fields using the field extractor
115
119
  return self.field_extractor.extract_fields(ast)
116
120
 
117
- def _build_ast(self, parsed: Any) -> Dict[str, Any]: # noqa: C901
121
+ def _build_ast(self, parsed: Any, depth: int = 0) -> Dict[str, Any]: # noqa: C901
118
122
  """Build AST from parsed pyparsing result.
119
123
 
120
124
  Args:
121
125
  parsed: The parsed result from pyparsing
126
+ depth: Current recursion depth (for DoS prevention)
122
127
 
123
128
  Returns:
124
129
  Dictionary representing the AST node
130
+
131
+ Raises:
132
+ TQLSyntaxError: If query depth exceeds maximum allowed depth
125
133
  """
134
+ # Check depth limit to prevent stack overflow and DoS attacks
135
+ if depth > self.MAX_QUERY_DEPTH:
136
+ raise TQLSyntaxError(
137
+ f"Query depth exceeds maximum allowed depth of {self.MAX_QUERY_DEPTH}. "
138
+ "Please simplify your query to reduce nesting.",
139
+ position=0,
140
+ query="",
141
+ suggestions=["Reduce query nesting depth", "Split into multiple simpler queries"],
142
+ )
143
+
126
144
  if isinstance(parsed, list):
127
145
  if len(parsed) == 1:
128
146
  # Single item, check if it's a field with is_private/is_global mutator
@@ -162,7 +180,7 @@ class TQLParser:
162
180
  }
163
181
  return result
164
182
  # Single item, unwrap it
165
- return self._build_ast(parsed[0])
183
+ return self._build_ast(parsed[0], depth + 1)
166
184
  elif len(parsed) >= 2 and isinstance(parsed[0], str) and parsed[0].lower() == "stats":
167
185
  # This is a stats expression without filter (applies to all records)
168
186
  return self._build_stats_ast(parsed)
@@ -210,7 +228,7 @@ class TQLParser:
210
228
  # Check for NOT operator first (before field | mutator check)
211
229
  elif isinstance(first, str) and (first.lower() == "not" or first == "!"):
212
230
  # Unary logical operator (NOT or !)
213
- return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second)}
231
+ return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second, depth + 1)}
214
232
 
215
233
  # Check for field | mutator without operator
216
234
  # This happens when we have a field with mutator(s) as the last element
@@ -267,12 +285,16 @@ class TQLParser:
267
285
  # This is filter | stats
268
286
  return {
269
287
  "type": "query_with_stats",
270
- "filter": self._build_ast(first),
288
+ "filter": self._build_ast(first, depth + 1),
271
289
  "stats": self._build_stats_ast(second),
272
290
  }
273
291
  else:
274
292
  # Fallback to treating as unary logical operator
275
- return {"type": "unary_op", "operator": first.lower(), "operand": self._build_ast(second)}
293
+ return {
294
+ "type": "unary_op",
295
+ "operator": first.lower(),
296
+ "operand": self._build_ast(second, depth + 1),
297
+ }
276
298
  elif len(parsed) >= 3:
277
299
  # Check if this is a field with multiple mutators
278
300
  if isinstance(parsed[0], str) and all(
@@ -419,7 +441,7 @@ class TQLParser:
419
441
  "field": field_name,
420
442
  "type_hint": type_hint,
421
443
  "field_mutators": field_mutators,
422
- "conditions": self._build_ast(conditions) if conditions else None,
444
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
423
445
  }
424
446
 
425
447
  # Add geo parameters if any
@@ -497,7 +519,7 @@ class TQLParser:
497
519
  "field": field_name,
498
520
  "type_hint": type_hint,
499
521
  "field_mutators": field_mutators,
500
- "conditions": self._build_ast(conditions) if conditions else None,
522
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
501
523
  }
502
524
 
503
525
  # Add nslookup parameters if any
@@ -638,7 +660,7 @@ class TQLParser:
638
660
  "field": field_name,
639
661
  "type_hint": type_hint,
640
662
  "field_mutators": field_mutators,
641
- "conditions": self._build_ast(conditions) if conditions else None,
663
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
642
664
  }
643
665
 
644
666
  # Add geo parameters if any
@@ -715,7 +737,7 @@ class TQLParser:
715
737
  "field": field_name,
716
738
  "type_hint": type_hint,
717
739
  "field_mutators": field_mutators,
718
- "conditions": self._build_ast(conditions) if conditions else None,
740
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
719
741
  }
720
742
 
721
743
  # Add nslookup parameters if any
@@ -725,7 +747,7 @@ class TQLParser:
725
747
  return result
726
748
  else:
727
749
  # This is a chained operation, not a between operation
728
- return self._build_chained_ast(parsed)
750
+ return self._build_chained_ast(parsed, depth + 1)
729
751
 
730
752
  elif len(parsed) == 6:
731
753
  # Check for "field not between value1 and value2" or "field ! between value1 and value2"
@@ -814,7 +836,7 @@ class TQLParser:
814
836
  "field": field_name,
815
837
  "type_hint": type_hint,
816
838
  "field_mutators": field_mutators,
817
- "conditions": self._build_ast(conditions) if conditions else None,
839
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
818
840
  }
819
841
 
820
842
  # Add geo parameters if any
@@ -824,7 +846,7 @@ class TQLParser:
824
846
  return result
825
847
  else:
826
848
  # This is a chained operation, not a not_between operation
827
- return self._build_chained_ast(parsed)
849
+ return self._build_chained_ast(parsed, depth + 1)
828
850
 
829
851
  elif len(parsed) == 3:
830
852
  # Binary operation or comparison (including negated unary operators like "field not exists")
@@ -869,7 +891,7 @@ class TQLParser:
869
891
  "field": field_name,
870
892
  "type_hint": type_hint,
871
893
  "field_mutators": field_mutators,
872
- "conditions": self._build_ast(conditions) if conditions else None,
894
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
873
895
  }
874
896
 
875
897
  # Add geo parameters if any
@@ -923,7 +945,7 @@ class TQLParser:
923
945
  "field": field_name,
924
946
  "type_hint": type_hint,
925
947
  "field_mutators": field_mutators,
926
- "conditions": self._build_ast(conditions) if conditions else None,
948
+ "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
927
949
  }
928
950
 
929
951
  # Add nslookup parameters if any
@@ -937,8 +959,8 @@ class TQLParser:
937
959
  return {
938
960
  "type": "logical_op",
939
961
  "operator": operator.lower(),
940
- "left": self._build_ast(left),
941
- "right": self._build_ast(right),
962
+ "left": self._build_ast(left, depth + 1),
963
+ "right": self._build_ast(right, depth + 1),
942
964
  }
943
965
  elif (
944
966
  isinstance(operator, str)
@@ -1189,7 +1211,7 @@ class TQLParser:
1189
1211
  # Handle longer lists (chained operations)
1190
1212
  # This happens with infixNotation for multiple AND/OR operations
1191
1213
  # The structure will be flattened, so we need to reconstruct the tree
1192
- return self._build_chained_ast(parsed)
1214
+ return self._build_chained_ast(parsed, depth + 1)
1193
1215
  else:
1194
1216
  # Single value - should already be a proper AST node
1195
1217
  if isinstance(parsed, dict):
@@ -1201,21 +1223,34 @@ class TQLParser:
1201
1223
  # This should be unreachable, but helps mypy understand all paths return
1202
1224
  raise AssertionError("Unreachable code in _build_ast")
1203
1225
 
1204
- def _build_chained_ast(self, parsed_list: List[Any]) -> Dict[str, Any]:
1226
+ def _build_chained_ast(self, parsed_list: List[Any], depth: int = 0) -> Dict[str, Any]:
1205
1227
  """Build AST from chained operations (e.g., A AND B AND C).
1206
1228
 
1207
1229
  Args:
1208
1230
  parsed_list: List of alternating operands and operators
1231
+ depth: Current recursion depth (for DoS prevention)
1209
1232
 
1210
1233
  Returns:
1211
1234
  Dictionary representing the AST node
1235
+
1236
+ Raises:
1237
+ TQLSyntaxError: If query depth exceeds maximum allowed depth
1212
1238
  """
1239
+ # Check depth limit to prevent stack overflow
1240
+ if depth > self.MAX_QUERY_DEPTH:
1241
+ raise TQLSyntaxError(
1242
+ f"Query depth exceeds maximum allowed depth of {self.MAX_QUERY_DEPTH}. "
1243
+ "Please simplify your query to reduce nesting.",
1244
+ position=0,
1245
+ query="",
1246
+ suggestions=["Reduce query nesting depth", "Split into multiple simpler queries"],
1247
+ )
1213
1248
  if len(parsed_list) < 3:
1214
1249
  # Not enough elements for a chained operation
1215
1250
  return {"type": "unknown", "value": parsed_list}
1216
1251
 
1217
1252
  # Start with the first operand
1218
- result = self._build_ast(parsed_list[0])
1253
+ result = self._build_ast(parsed_list[0], depth + 1)
1219
1254
 
1220
1255
  # Process pairs of (operator, operand)
1221
1256
  i = 1
@@ -1228,7 +1263,7 @@ class TQLParser:
1228
1263
  "type": "logical_op",
1229
1264
  "operator": operator.lower(),
1230
1265
  "left": result,
1231
- "right": self._build_ast(operand),
1266
+ "right": self._build_ast(operand, depth + 1),
1232
1267
  }
1233
1268
  else:
1234
1269
  # This shouldn't happen in a well-formed chained expression
tql/post_processor.py CHANGED
@@ -73,12 +73,17 @@ class QueryPostProcessor:
73
73
  field_accessor.get_field_value, evaluator._evaluate_node
74
74
  )
75
75
 
76
+ # Safe access with validation
77
+ # conditions is guaranteed to exist by the if check above
78
+ conditions = requirement.metadata["conditions"]
79
+ nslookup_params = requirement.metadata.get("nslookup_params", {})
80
+
76
81
  # Build node for evaluation
77
82
  node = {
78
83
  "type": "nslookup_expr",
79
84
  "field": requirement.field_name,
80
- "conditions": requirement.metadata["conditions"],
81
- "nslookup_params": requirement.metadata.get("nslookup_params", {}),
85
+ "conditions": conditions,
86
+ "nslookup_params": nslookup_params,
82
87
  }
83
88
 
84
89
  # Evaluate the nslookup expression
@@ -91,6 +96,7 @@ class QueryPostProcessor:
91
96
  and requirement.metadata
92
97
  and "conditions" in requirement.metadata
93
98
  ):
99
+ # Safe access - conditions is guaranteed to exist by the if check
94
100
  conditions = requirement.metadata["conditions"]
95
101
  if conditions:
96
102
  # Get the geo data that was enriched
@@ -129,12 +135,15 @@ class QueryPostProcessor:
129
135
  # Check if this is an array operator with comparison
130
136
  if "comparison_operator" in requirement.metadata:
131
137
  # This is a special case: field | any/all/none eq value
132
- array_operator = requirement.metadata["operator"]
133
- comparison_operator = requirement.metadata["comparison_operator"]
138
+ # Safe access - both keys are guaranteed to exist by the if checks
139
+ array_operator = requirement.metadata["operator"] # exists from line 128 check
140
+ comparison_operator = requirement.metadata[
141
+ "comparison_operator"
142
+ ] # exists from line 135 check
134
143
  value = requirement.metadata.get("value")
135
144
 
136
- # Get the field value
137
- temp_field_name = f"__{requirement.field_name}_mutated__"
145
+ # Get the field value with proper nested field handling
146
+ temp_field_name = self._get_mutated_field_name(requirement.field_name)
138
147
  field_value = self._get_field_value(result, temp_field_name)
139
148
  if field_value is None:
140
149
  # No mutated value, get original
@@ -148,18 +157,21 @@ class QueryPostProcessor:
148
157
  break
149
158
  else:
150
159
  # Regular operator check
160
+ # Safe access - operator is guaranteed to exist by the if check at line 134
151
161
  operator = requirement.metadata["operator"]
152
162
  value = requirement.metadata.get("value")
153
163
 
154
164
  # Check if this was originally a different operator (for type-changing mutators)
155
165
  if requirement.metadata.get("_original_comparison"):
166
+ # Safe access - validated by .get() check above
156
167
  original = requirement.metadata["_original_comparison"]
157
- operator = original["operator"]
168
+ # Validate that operator exists in original
169
+ operator = original.get("operator", operator)
158
170
  value = original.get("value", value)
159
171
 
160
172
  # Get the field value - either mutated or original
161
173
  # First check for mutated value in temp field
162
- temp_field_name = f"__{requirement.field_name}_mutated__"
174
+ temp_field_name = self._get_mutated_field_name(requirement.field_name)
163
175
  field_value = self._get_field_value(result, temp_field_name)
164
176
  if field_value is None:
165
177
  # No mutated value, get original
@@ -373,7 +385,7 @@ class QueryPostProcessor:
373
385
  return False
374
386
 
375
387
  # Get the field value
376
- temp_field_name = f"__{field_name}_mutated__"
388
+ temp_field_name = self._get_mutated_field_name(field_name)
377
389
  field_value = self._get_field_value(result, temp_field_name)
378
390
  if field_value is None:
379
391
  # No mutated value, get original
@@ -703,7 +715,7 @@ class QueryPostProcessor:
703
715
  self._set_field_value(result, requirement.field_name, mutated_value)
704
716
  elif not is_geo_enrichment:
705
717
  # For type-changing mutators with filtering operations, store in temp field
706
- temp_field_name = f"__{requirement.field_name}_mutated__"
718
+ temp_field_name = self._get_mutated_field_name(requirement.field_name)
707
719
  self._set_field_value(result, temp_field_name, mutated_value)
708
720
 
709
721
  # Check if we have any enrichment mutators
@@ -994,6 +1006,25 @@ class QueryPostProcessor:
994
1006
 
995
1007
  return current
996
1008
 
1009
+ def _get_mutated_field_name(self, field_name: str) -> str:
1010
+ """Generate the correct mutated field name for nested or flat fields.
1011
+
1012
+ Args:
1013
+ field_name: The original field name (e.g., "user.address.zip" or "status")
1014
+
1015
+ Returns:
1016
+ Mutated field name with proper nesting:
1017
+ - "user.address.zip" -> "user.address.__zip_mutated__"
1018
+ - "status" -> "__status_mutated__"
1019
+ """
1020
+ field_parts = field_name.split(".")
1021
+ if len(field_parts) > 1:
1022
+ # For nested fields, only mutate the leaf field name
1023
+ return ".".join(field_parts[:-1] + [f"__{field_parts[-1]}_mutated__"])
1024
+ else:
1025
+ # For flat fields, mutate the entire name
1026
+ return f"__{field_name}_mutated__"
1027
+
997
1028
  def _get_field_value(self, record: Dict[str, Any], field_path: str) -> Any:
998
1029
  """Get a field value from a record, supporting nested fields.
999
1030
 
@@ -1140,7 +1171,9 @@ class PostProcessingStats:
1140
1171
  class PostProcessingError(Exception):
1141
1172
  """Exception raised during post-processing operations."""
1142
1173
 
1143
- def __init__(self, message: str, field_name: Optional[str] = None, mutator_name: Optional[str] = None):
1174
+ def __init__( # noqa: B042
1175
+ self, message: str, field_name: Optional[str] = None, mutator_name: Optional[str] = None
1176
+ ):
1144
1177
  """Initialize post-processing error.
1145
1178
 
1146
1179
  Args:
tql/scripts.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """ Runs pytest, coverage, linters, and security checks. """
2
2
 
3
+ import os
3
4
  import subprocess # nosec
4
5
 
5
6
 
@@ -29,8 +30,13 @@ def run_coverage():
29
30
  Run coverage against all files in the `src` directory
30
31
  and output an XML report to `reports/coverage.xml`.
31
32
  """
33
+ # Set environment to skip integration tests by default
34
+ env = os.environ.copy()
35
+ if "INTEGRATION_TEST_ENABLE" not in env:
36
+ env["INTEGRATION_TEST_ENABLE"] = "false"
37
+
32
38
  # 1. Run pytest with coverage, using `src` as the source
33
- subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True) # nosec
39
+ subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True, env=env) # nosec
34
40
 
35
41
  # 2. Generate an XML coverage report in `reports/coverage.xml`
36
42
  subprocess.run(["coverage", "xml", "-o", "reports/coverage/coverage.xml"], check=True) # nosec
@@ -40,7 +46,12 @@ def run_coverage():
40
46
 
41
47
  def run_tests():
42
48
  """Runs pytests against tests in the `tests` directory."""
43
- subprocess.run(["pytest", "tests"], check=True) # nosec
49
+ # Set environment to skip integration tests by default
50
+ env = os.environ.copy()
51
+ if "INTEGRATION_TEST_ENABLE" not in env:
52
+ env["INTEGRATION_TEST_ENABLE"] = "false"
53
+
54
+ subprocess.run(["pytest", "tests"], check=True, env=env) # nosec
44
55
 
45
56
 
46
57
  def run_lint_all():
@@ -84,6 +95,11 @@ def run_lint():
84
95
 
85
96
  def run_badge():
86
97
  """Generate a badge using genbadge."""
98
+ # Set environment to skip integration tests by default
99
+ env = os.environ.copy()
100
+ if "INTEGRATION_TEST_ENABLE" not in env:
101
+ env["INTEGRATION_TEST_ENABLE"] = "false"
102
+
87
103
  subprocess.run( # nosec
88
104
  [
89
105
  "coverage",
@@ -94,6 +110,7 @@ def run_badge():
94
110
  "--junit-xml=reports/junit/junit.xml",
95
111
  ],
96
112
  check=True,
113
+ env=env,
97
114
  )
98
115
 
99
116
  # 2. Generate an XML coverage report in `reports/coverage.xml`