PyPI - tellaro-query-language - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

tellaro-query-language 0.2.2py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

tellaro_query_language-0.2.5.dist-info/LICENSE +72 -0
tellaro_query_language-0.2.5.dist-info/METADATA +806 -0
{tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.5.dist-info}/RECORD +25 -22
{tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.5.dist-info}/entry_points.txt +1 -0
tql/__init__.py +1 -1
tql/cache/base.py +79 -7
tql/cache/memory.py +126 -18
tql/cli.py +484 -0
tql/core.py +261 -5
tql/core_components/opensearch_operations.py +23 -4
tql/evaluator.py +3 -1
tql/evaluator_components/special_expressions.py +62 -10
tql/evaluator_components/value_comparison.py +70 -12
tql/exceptions.py +6 -4
tql/field_type_inference.py +285 -0
tql/mutator_analyzer.py +2 -2
tql/mutators/geo.py +57 -20
tql/opensearch_components/query_converter.py +1 -1
tql/opensearch_stats.py +10 -7
tql/parser.py +56 -21
tql/post_processor.py +44 -11
tql/scripts.py +19 -2
tql/stats_evaluator.py +361 -7
tql/streaming_file_processor.py +335 -0
tellaro_query_language-0.2.2.dist-info/LICENSE +0 -21
tellaro_query_language-0.2.2.dist-info/METADATA +0 -433
{tellaro_query_language-0.2.2.dist-info → tellaro_query_language-0.2.5.dist-info}/WHEEL +0 -0

tql/parser.py CHANGED Viewed

@@ -24,6 +24,9 @@ class TQLParser:
     evaluated against data or converted to backend-specific query formats.
     """
+    # Maximum query depth to prevent stack overflow and DoS attacks
+    MAX_QUERY_DEPTH = 50
     def __init__(self):
         """Initialize the parser with TQL grammar."""
         self.grammar = TQLGrammar()
@@ -53,7 +56,8 @@ class TQLParser:
             parsed_result = self.grammar.tql_expr.parseString(query, parseAll=True)
             # Convert to our AST format
-            return self._build_ast(parsed_result.asList()[0])
+            # Start depth counting at 0 from parse() entry point
+            return self._build_ast(parsed_result.asList()[0], depth=0)
         except ParseException as e:
             # Extract position and context from pyparsing exception
@@ -114,15 +118,29 @@ class TQLParser:
         # Extract fields using the field extractor
         return self.field_extractor.extract_fields(ast)
-    def _build_ast(self, parsed: Any) -> Dict[str, Any]:  # noqa: C901
+    def _build_ast(self, parsed: Any, depth: int = 0) -> Dict[str, Any]:  # noqa: C901
         """Build AST from parsed pyparsing result.
         Args:
             parsed: The parsed result from pyparsing
+            depth: Current recursion depth (for DoS prevention)
         Returns:
             Dictionary representing the AST node
+        Raises:
+            TQLSyntaxError: If query depth exceeds maximum allowed depth
         """
+        # Check depth limit to prevent stack overflow and DoS attacks
+        if depth > self.MAX_QUERY_DEPTH:
+            raise TQLSyntaxError(
+                f"Query depth exceeds maximum allowed depth of {self.MAX_QUERY_DEPTH}. "
+                "Please simplify your query to reduce nesting.",
+                position=0,
+                query="",
+                suggestions=["Reduce query nesting depth", "Split into multiple simpler queries"],
+            )
         if isinstance(parsed, list):
             if len(parsed) == 1:
                 # Single item, check if it's a field with is_private/is_global mutator
@@ -162,7 +180,7 @@ class TQLParser:
                             }
                             return result
                 # Single item, unwrap it
-                return self._build_ast(parsed[0])
+                return self._build_ast(parsed[0], depth + 1)
             elif len(parsed) >= 2 and isinstance(parsed[0], str) and parsed[0].lower() == "stats":
                 # This is a stats expression without filter (applies to all records)
                 return self._build_stats_ast(parsed)
@@ -210,7 +228,7 @@ class TQLParser:
                 # Check for NOT operator first (before field | mutator check)
                 elif isinstance(first, str) and (first.lower() == "not" or first == "!"):
                     # Unary logical operator (NOT or !)
-                    return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second)}
+                    return {"type": "unary_op", "operator": "not", "operand": self._build_ast(second, depth + 1)}
                 # Check for field | mutator without operator
                 # This happens when we have a field with mutator(s) as the last element
@@ -267,12 +285,16 @@ class TQLParser:
                         # This is filter | stats
                         return {
                             "type": "query_with_stats",
-                            "filter": self._build_ast(first),
+                            "filter": self._build_ast(first, depth + 1),
                             "stats": self._build_stats_ast(second),
                         }
                 else:
                     # Fallback to treating as unary logical operator
-                    return {"type": "unary_op", "operator": first.lower(), "operand": self._build_ast(second)}
+                    return {
+                        "type": "unary_op",
+                        "operator": first.lower(),
+                        "operand": self._build_ast(second, depth + 1),
+                    }
             elif len(parsed) >= 3:
                 # Check if this is a field with multiple mutators
                 if isinstance(parsed[0], str) and all(
@@ -419,7 +441,7 @@ class TQLParser:
                             "field": field_name,
                             "type_hint": type_hint,
                             "field_mutators": field_mutators,
-                            "conditions": self._build_ast(conditions) if conditions else None,
+                            "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
                         }
                         # Add geo parameters if any
@@ -497,7 +519,7 @@ class TQLParser:
                             "field": field_name,
                             "type_hint": type_hint,
                             "field_mutators": field_mutators,
-                            "conditions": self._build_ast(conditions) if conditions else None,
+                            "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
                         }
                         # Add nslookup parameters if any
@@ -638,7 +660,7 @@ class TQLParser:
                             "field": field_name,
                             "type_hint": type_hint,
                             "field_mutators": field_mutators,
-                            "conditions": self._build_ast(conditions) if conditions else None,
+                            "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
                         }
                         # Add geo parameters if any
@@ -715,7 +737,7 @@ class TQLParser:
                             "field": field_name,
                             "type_hint": type_hint,
                             "field_mutators": field_mutators,
-                            "conditions": self._build_ast(conditions) if conditions else None,
+                            "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
                         }
                         # Add nslookup parameters if any
@@ -725,7 +747,7 @@ class TQLParser:
                         return result
                     else:
                         # This is a chained operation, not a between operation
-                        return self._build_chained_ast(parsed)
+                        return self._build_chained_ast(parsed, depth + 1)
             elif len(parsed) == 6:
                 # Check for "field not between value1 and value2" or "field ! between value1 and value2"
@@ -814,7 +836,7 @@ class TQLParser:
                             "field": field_name,
                             "type_hint": type_hint,
                             "field_mutators": field_mutators,
-                            "conditions": self._build_ast(conditions) if conditions else None,
+                            "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
                         }
                         # Add geo parameters if any
@@ -824,7 +846,7 @@ class TQLParser:
                         return result
                     else:
                         # This is a chained operation, not a not_between operation
-                        return self._build_chained_ast(parsed)
+                        return self._build_chained_ast(parsed, depth + 1)
             elif len(parsed) == 3:
                 # Binary operation or comparison (including negated unary operators like "field not exists")
@@ -869,7 +891,7 @@ class TQLParser:
                         "field": field_name,
                         "type_hint": type_hint,
                         "field_mutators": field_mutators,
-                        "conditions": self._build_ast(conditions) if conditions else None,
+                        "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
                     }
                     # Add geo parameters if any
@@ -923,7 +945,7 @@ class TQLParser:
                         "field": field_name,
                         "type_hint": type_hint,
                         "field_mutators": field_mutators,
-                        "conditions": self._build_ast(conditions) if conditions else None,
+                        "conditions": self._build_ast(conditions, depth + 1) if conditions else None,
                     }
                     # Add nslookup parameters if any
@@ -937,8 +959,8 @@ class TQLParser:
                     return {
                         "type": "logical_op",
                         "operator": operator.lower(),
-                        "left": self._build_ast(left),
-                        "right": self._build_ast(right),
+                        "left": self._build_ast(left, depth + 1),
+                        "right": self._build_ast(right, depth + 1),
                     }
                 elif (
                     isinstance(operator, str)
@@ -1189,7 +1211,7 @@ class TQLParser:
                 # Handle longer lists (chained operations)
                 # This happens with infixNotation for multiple AND/OR operations
                 # The structure will be flattened, so we need to reconstruct the tree
-                return self._build_chained_ast(parsed)
+                return self._build_chained_ast(parsed, depth + 1)
         else:
             # Single value - should already be a proper AST node
             if isinstance(parsed, dict):
@@ -1201,21 +1223,34 @@ class TQLParser:
         # This should be unreachable, but helps mypy understand all paths return
         raise AssertionError("Unreachable code in _build_ast")
-    def _build_chained_ast(self, parsed_list: List[Any]) -> Dict[str, Any]:
+    def _build_chained_ast(self, parsed_list: List[Any], depth: int = 0) -> Dict[str, Any]:
         """Build AST from chained operations (e.g., A AND B AND C).
         Args:
             parsed_list: List of alternating operands and operators
+            depth: Current recursion depth (for DoS prevention)
         Returns:
             Dictionary representing the AST node
+        Raises:
+            TQLSyntaxError: If query depth exceeds maximum allowed depth
         """
+        # Check depth limit to prevent stack overflow
+        if depth > self.MAX_QUERY_DEPTH:
+            raise TQLSyntaxError(
+                f"Query depth exceeds maximum allowed depth of {self.MAX_QUERY_DEPTH}. "
+                "Please simplify your query to reduce nesting.",
+                position=0,
+                query="",
+                suggestions=["Reduce query nesting depth", "Split into multiple simpler queries"],
+            )
         if len(parsed_list) < 3:
             # Not enough elements for a chained operation
             return {"type": "unknown", "value": parsed_list}
         # Start with the first operand
-        result = self._build_ast(parsed_list[0])
+        result = self._build_ast(parsed_list[0], depth + 1)
         # Process pairs of (operator, operand)
         i = 1
@@ -1228,7 +1263,7 @@ class TQLParser:
                     "type": "logical_op",
                     "operator": operator.lower(),
                     "left": result,
-                    "right": self._build_ast(operand),
+                    "right": self._build_ast(operand, depth + 1),
                 }
             else:
                 # This shouldn't happen in a well-formed chained expression

tql/post_processor.py CHANGED Viewed

@@ -73,12 +73,17 @@ class QueryPostProcessor:
                             field_accessor.get_field_value, evaluator._evaluate_node
                         )
+                        # Safe access with validation
+                        # conditions is guaranteed to exist by the if check above
+                        conditions = requirement.metadata["conditions"]
+                        nslookup_params = requirement.metadata.get("nslookup_params", {})
                         # Build node for evaluation
                         node = {
                             "type": "nslookup_expr",
                             "field": requirement.field_name,
-                            "conditions": requirement.metadata["conditions"],
-                            "nslookup_params": requirement.metadata.get("nslookup_params", {}),
+                            "conditions": conditions,
+                            "nslookup_params": nslookup_params,
                         }
                         # Evaluate the nslookup expression
@@ -91,6 +96,7 @@ class QueryPostProcessor:
                         and requirement.metadata
                         and "conditions" in requirement.metadata
                     ):
+                        # Safe access - conditions is guaranteed to exist by the if check
                         conditions = requirement.metadata["conditions"]
                         if conditions:
                             # Get the geo data that was enriched
@@ -129,12 +135,15 @@ class QueryPostProcessor:
                         # Check if this is an array operator with comparison
                         if "comparison_operator" in requirement.metadata:
                             # This is a special case: field | any/all/none eq value
-                            array_operator = requirement.metadata["operator"]
-                            comparison_operator = requirement.metadata["comparison_operator"]
+                            # Safe access - both keys are guaranteed to exist by the if checks
+                            array_operator = requirement.metadata["operator"]  # exists from line 128 check
+                            comparison_operator = requirement.metadata[
+                                "comparison_operator"
+                            ]  # exists from line 135 check
                             value = requirement.metadata.get("value")
-                            # Get the field value
-                            temp_field_name = f"__{requirement.field_name}_mutated__"
+                            # Get the field value with proper nested field handling
+                            temp_field_name = self._get_mutated_field_name(requirement.field_name)
                             field_value = self._get_field_value(result, temp_field_name)
                             if field_value is None:
                                 # No mutated value, get original
@@ -148,18 +157,21 @@ class QueryPostProcessor:
                                 break
                         else:
                             # Regular operator check
+                            # Safe access - operator is guaranteed to exist by the if check at line 134
                             operator = requirement.metadata["operator"]
                             value = requirement.metadata.get("value")
                             # Check if this was originally a different operator (for type-changing mutators)
                             if requirement.metadata.get("_original_comparison"):
+                                # Safe access - validated by .get() check above
                                 original = requirement.metadata["_original_comparison"]
-                                operator = original["operator"]
+                                # Validate that operator exists in original
+                                operator = original.get("operator", operator)
                                 value = original.get("value", value)
                             # Get the field value - either mutated or original
                             # First check for mutated value in temp field
-                            temp_field_name = f"__{requirement.field_name}_mutated__"
+                            temp_field_name = self._get_mutated_field_name(requirement.field_name)
                             field_value = self._get_field_value(result, temp_field_name)
                             if field_value is None:
                                 # No mutated value, get original
@@ -373,7 +385,7 @@ class QueryPostProcessor:
                 return False
             # Get the field value
-            temp_field_name = f"__{field_name}_mutated__"
+            temp_field_name = self._get_mutated_field_name(field_name)
             field_value = self._get_field_value(result, temp_field_name)
             if field_value is None:
                 # No mutated value, get original
@@ -703,7 +715,7 @@ class QueryPostProcessor:
                 self._set_field_value(result, requirement.field_name, mutated_value)
             elif not is_geo_enrichment:
                 # For type-changing mutators with filtering operations, store in temp field
-                temp_field_name = f"__{requirement.field_name}_mutated__"
+                temp_field_name = self._get_mutated_field_name(requirement.field_name)
                 self._set_field_value(result, temp_field_name, mutated_value)
             # Check if we have any enrichment mutators
@@ -994,6 +1006,25 @@ class QueryPostProcessor:
         return current
+    def _get_mutated_field_name(self, field_name: str) -> str:
+        """Generate the correct mutated field name for nested or flat fields.
+        Args:
+            field_name: The original field name (e.g., "user.address.zip" or "status")
+        Returns:
+            Mutated field name with proper nesting:
+            - "user.address.zip" -> "user.address.__zip_mutated__"
+            - "status" -> "__status_mutated__"
+        """
+        field_parts = field_name.split(".")
+        if len(field_parts) > 1:
+            # For nested fields, only mutate the leaf field name
+            return ".".join(field_parts[:-1] + [f"__{field_parts[-1]}_mutated__"])
+        else:
+            # For flat fields, mutate the entire name
+            return f"__{field_name}_mutated__"
     def _get_field_value(self, record: Dict[str, Any], field_path: str) -> Any:
         """Get a field value from a record, supporting nested fields.
@@ -1140,7 +1171,9 @@ class PostProcessingStats:
 class PostProcessingError(Exception):
     """Exception raised during post-processing operations."""
-    def __init__(self, message: str, field_name: Optional[str] = None, mutator_name: Optional[str] = None):
+    def __init__(  # noqa: B042
+        self, message: str, field_name: Optional[str] = None, mutator_name: Optional[str] = None
+    ):
         """Initialize post-processing error.
         Args:

tql/scripts.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """ Runs pytest, coverage, linters, and security checks. """
+import os
 import subprocess  # nosec
@@ -29,8 +30,13 @@ def run_coverage():
     Run coverage against all files in the `src` directory
     and output an XML report to `reports/coverage.xml`.
     """
+    # Set environment to skip integration tests by default
+    env = os.environ.copy()
+    if "INTEGRATION_TEST_ENABLE" not in env:
+        env["INTEGRATION_TEST_ENABLE"] = "false"
     # 1. Run pytest with coverage, using `src` as the source
-    subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True)  # nosec
+    subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True, env=env)  # nosec
     # 2. Generate an XML coverage report in `reports/coverage.xml`
     subprocess.run(["coverage", "xml", "-o", "reports/coverage/coverage.xml"], check=True)  # nosec
@@ -40,7 +46,12 @@ def run_coverage():
 def run_tests():
     """Runs pytests against tests in the `tests` directory."""
-    subprocess.run(["pytest", "tests"], check=True)  # nosec
+    # Set environment to skip integration tests by default
+    env = os.environ.copy()
+    if "INTEGRATION_TEST_ENABLE" not in env:
+        env["INTEGRATION_TEST_ENABLE"] = "false"
+    subprocess.run(["pytest", "tests"], check=True, env=env)  # nosec
 def run_lint_all():
@@ -84,6 +95,11 @@ def run_lint():
 def run_badge():
     """Generate a badge using genbadge."""
+    # Set environment to skip integration tests by default
+    env = os.environ.copy()
+    if "INTEGRATION_TEST_ENABLE" not in env:
+        env["INTEGRATION_TEST_ENABLE"] = "false"
     subprocess.run(  # nosec
         [
             "coverage",
@@ -94,6 +110,7 @@ def run_badge():
             "--junit-xml=reports/junit/junit.xml",
         ],
         check=True,
+        env=env,
     )
     # 2. Generate an XML coverage report in `reports/coverage.xml`

tellaro-query-language 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl

tellaro-query-language 0.2.2py3-none-any.whl → 0.2.5py3-none-any.whl