PyPI - tellaro-query-language - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
tql/__init__.py +47 -0
tql/analyzer.py +385 -0
tql/cache/__init__.py +7 -0
tql/cache/base.py +25 -0
tql/cache/memory.py +63 -0
tql/cache/redis.py +68 -0
tql/core.py +929 -0
tql/core_components/README.md +92 -0
tql/core_components/__init__.py +20 -0
tql/core_components/file_operations.py +113 -0
tql/core_components/opensearch_operations.py +869 -0
tql/core_components/stats_operations.py +200 -0
tql/core_components/validation_operations.py +599 -0
tql/evaluator.py +379 -0
tql/evaluator_components/README.md +131 -0
tql/evaluator_components/__init__.py +17 -0
tql/evaluator_components/field_access.py +176 -0
tql/evaluator_components/special_expressions.py +296 -0
tql/evaluator_components/value_comparison.py +315 -0
tql/exceptions.py +160 -0
tql/geoip_normalizer.py +233 -0
tql/mutator_analyzer.py +830 -0
tql/mutators/__init__.py +222 -0
tql/mutators/base.py +78 -0
tql/mutators/dns.py +316 -0
tql/mutators/encoding.py +218 -0
tql/mutators/geo.py +363 -0
tql/mutators/list.py +212 -0
tql/mutators/network.py +163 -0
tql/mutators/security.py +225 -0
tql/mutators/string.py +165 -0
tql/opensearch.py +78 -0
tql/opensearch_components/README.md +130 -0
tql/opensearch_components/__init__.py +17 -0
tql/opensearch_components/field_mapping.py +399 -0
tql/opensearch_components/lucene_converter.py +305 -0
tql/opensearch_components/query_converter.py +775 -0
tql/opensearch_mappings.py +309 -0
tql/opensearch_stats.py +451 -0
tql/parser.py +1363 -0
tql/parser_components/README.md +72 -0
tql/parser_components/__init__.py +20 -0
tql/parser_components/ast_builder.py +162 -0
tql/parser_components/error_analyzer.py +101 -0
tql/parser_components/field_extractor.py +112 -0
tql/parser_components/grammar.py +473 -0
tql/post_processor.py +737 -0
tql/scripts.py +124 -0
tql/stats_evaluator.py +444 -0
tql/stats_transformer.py +184 -0
tql/validators.py +110 -0

tql/scripts.py ADDED Viewed

@@ -0,0 +1,124 @@
+""" Runs pytest, coverage, linters, and security checks. """
+import subprocess  # nosec
+def get_modified_files_as_set():
+    """Get a set of modified files in the current git branch."""
+    # Run the git command
+    result = subprocess.run(  # nosec
+        ["git", "diff", "--name-only", "HEAD"],
+        capture_output=True,  # Redirect stdout/stderr
+        text=True,  # Decode output to string
+        check=False,
+    )
+    # Strip whitespace and split on newlines
+    file_list = result.stdout.strip().split("\n")
+    # Remove "pyproject.toml" from the list
+    file_list = [f for f in file_list if f != "pyproject.toml"]
+    # Convert to a set (filter out any empty strings that might occur)
+    modified_files = {f for f in file_list if f}
+    return modified_files
+def run_coverage():
+    """
+    Run coverage against all files in the `src` directory
+    and output an XML report to `reports/coverage.xml`.
+    """
+    # 1. Run pytest with coverage, using `src` as the source
+    subprocess.run(["coverage", "run", "--source=src", "-m", "pytest"], check=True)  # nosec
+    # 2. Generate an XML coverage report in `reports/coverage.xml`
+    subprocess.run(["coverage", "xml", "-o", "reports/coverage/coverage.xml"], check=True)  # nosec
+    print("Coverage completed. XML report generated at reports/coverage.xml.")
+def run_tests():
+    """Runs pytests against tests in the `tests` directory."""
+    subprocess.run(["pytest", "tests"], check=True)  # nosec
+def run_lint_all():
+    """
+    Run linters for black, pylint, flake8, and isort
+    """
+    subprocess.run(  # nosec
+        ["black", "src", "tests"],
+        check=False,
+    )
+    subprocess.run(  # nosec
+        ["isort", "src", "tests"],
+        check=False,
+    )
+    subprocess.run(  # nosec
+        ["flake8", "src", "tests"],
+        check=False,
+    )
+    subprocess.run(  # nosec
+        ["pylint", "src", "tests"],
+        check=False,
+    )
+def run_lint():
+    """
+    Run linters for black, pylint, flake8, and isort on modified git files
+    """
+    files = get_modified_files_as_set()
+    files_list = list(files)
+    if not files_list:
+        print("No modified files detected.")
+        return
+    subprocess.run(["black", *files_list], check=False)  # black  # nosec
+    subprocess.run(["isort", *files_list], check=False)  # isort  # nosec
+    subprocess.run(["flake8", *files_list], check=False)  # flake8  # nosec
+    subprocess.run(["pylint", *files_list], check=False)  # pylint  # nosec
+def run_badge():
+    """Generate a badge using genbadge."""
+    subprocess.run(  # nosec
+        [
+            "coverage",
+            "run",
+            "--source=src",
+            "-m",
+            "pytest",
+            "--junit-xml=reports/junit/junit.xml",
+        ],
+        check=True,
+    )
+    # 2. Generate an XML coverage report in `reports/coverage.xml`
+    subprocess.run(["coverage", "xml", "-o", "reports/coverage/coverage.xml"], check=True)  # nosec
+    # 3. Generate an Flake8 report in `reports/flake8stats.xml`
+    subprocess.run(  # nosec
+        [
+            "flake8",
+            "--statistics",
+            "--output-file=reports/flake8/flake8stats.txt",
+            "--extend-exclude",
+            ".github,reports,.venv,.vscode",
+        ],
+        check=False,
+    )
+    # 4. Generate badge for flake8
+    subprocess.run(["genbadge", "flake8", "-o", "badge/flake8-badge.svg"], check=True)  # nosec
+    # 5. Generate badge for coverage
+    subprocess.run(["genbadge", "coverage", "-o", "badge/coverage-badge.svg"], check=True)  # nosec
+    # 6. Generate badge for tests
+    subprocess.run(  # nosec
+        ["genbadge", "tests", "-t", "90", "-o", "badge/test-badge.svg"],
+        check=True,
+    )

tql/stats_evaluator.py ADDED Viewed

@@ -0,0 +1,444 @@
+"""Stats evaluator for TQL aggregation queries.
+This module provides the TQLStatsEvaluator class for executing statistical
+aggregation queries against data records in memory.
+"""
+import statistics
+from collections import defaultdict
+from typing import Any, Dict, List, Optional, Union
+from .exceptions import TQLError
+class TQLStatsEvaluator:
+    """Evaluates TQL stats queries against data records.
+    This class handles statistical aggregations, grouping, and produces
+    results in a UI-friendly format.
+    """
+    # Aggregation functions that require numeric fields
+    NUMERIC_AGGREGATIONS = {
+        "sum",
+        "min",
+        "max",
+        "average",
+        "avg",
+        "median",
+        "med",
+        "std",
+        "standard_deviation",
+        "percentile",
+        "percentiles",
+        "p",
+        "pct",
+        "percentile_rank",
+        "percentile_ranks",
+        "pct_rank",
+        "pct_ranks",
+    }
+    # Aggregation functions that work with any field type
+    ANY_TYPE_AGGREGATIONS = {"count", "unique_count"}
+    # Numeric types supported by OpenSearch
+    NUMERIC_TYPES = {
+        "long",
+        "integer",
+        "short",
+        "byte",
+        "double",
+        "float",
+        "half_float",
+        "scaled_float",
+        "unsigned_long",
+    }
+    def __init__(self):
+        """Initialize the stats evaluator."""
+    def evaluate_stats(
+        self, records: List[Dict[str, Any]], stats_ast: Dict[str, Any], field_mappings: Optional[Dict[str, str]] = None
+    ) -> Dict[str, Any]:
+        """Evaluate stats query against records.
+        Args:
+            records: List of records to aggregate
+            stats_ast: Stats AST from parser
+            field_mappings: Optional field type mappings
+        Returns:
+            Aggregated results in UI-friendly format
+        """
+        aggregations = stats_ast.get("aggregations", [])
+        group_by_fields = stats_ast.get("group_by", [])
+        # Validate aggregation types against field mappings if provided
+        if field_mappings:
+            self._validate_aggregations(aggregations, field_mappings)
+        if not group_by_fields:
+            # Simple aggregation without grouping
+            return self._simple_aggregation(records, aggregations)
+        else:
+            # Grouped aggregation
+            return self._grouped_aggregation(records, aggregations, group_by_fields)
+    def _validate_aggregations(self, aggregations: List[Dict[str, Any]], field_mappings: Dict[str, str]) -> None:
+        """Validate that aggregation functions are compatible with field types.
+        Args:
+            aggregations: List of aggregation specifications
+            field_mappings: Field type mappings
+        Raises:
+            TQLError: If aggregation is incompatible with field type
+        """
+        for agg in aggregations:
+            func = agg["function"]
+            field = agg["field"]
+            # Skip validation for count(*)
+            if field == "*":
+                continue
+            # Check if function requires numeric type
+            if func in self.NUMERIC_AGGREGATIONS:
+                field_type = field_mappings.get(field, "unknown")
+                if field_type not in self.NUMERIC_TYPES and field_type != "unknown":
+                    raise TQLError(
+                        f"Cannot perform {func}() on non-numeric field '{field}' (type: {field_type}). "
+                        f"Use count() or unique_count() for non-numeric fields, or ensure '{field}' "
+                        f"is mapped as a numeric type."
+                    )
+    def _simple_aggregation(self, records: List[Dict[str, Any]], aggregations: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Perform aggregation without grouping.
+        Args:
+            records: Records to aggregate
+            aggregations: Aggregation specifications
+        Returns:
+            Aggregated results
+        """
+        if len(aggregations) == 1:
+            # Single aggregation
+            agg = aggregations[0]
+            value = self._calculate_aggregation(records, agg)
+            return {
+                "type": "simple_aggregation",
+                "function": agg["function"],
+                "field": agg["field"],
+                "alias": agg.get("alias"),
+                "value": value,
+            }
+        else:
+            # Multiple aggregations
+            results = {}
+            for agg in aggregations:
+                value = self._calculate_aggregation(records, agg)
+                key = agg.get("alias") or f"{agg['function']}_{agg['field']}"
+                results[key] = value
+            return {"type": "multiple_aggregations", "results": results}
+    def _grouped_aggregation(
+        self, records: List[Dict[str, Any]], aggregations: List[Dict[str, Any]], group_by_fields: List[str]
+    ) -> Dict[str, Any]:
+        """Perform aggregation with grouping.
+        Args:
+            records: Records to aggregate
+            aggregations: Aggregation specifications
+            group_by_fields: Fields to group by
+        Returns:
+            Grouped aggregation results
+        """
+        # Group records
+        groups = defaultdict(list)
+        for record in records:
+            # Build group key
+            key_parts = []
+            for field in group_by_fields:
+                value = self._get_field_value(record, field)
+                key_parts.append((field, value))
+            key = tuple(key_parts)
+            groups[key].append(record)
+        # Calculate aggregations for each group
+        results = []
+        for key, group_records in groups.items():
+            group_result: Dict[str, Any] = {"key": dict(key), "doc_count": len(group_records)}
+            if len(aggregations) == 1:
+                # Single aggregation
+                agg = aggregations[0]
+                value = self._calculate_aggregation(group_records, agg)
+                agg_key = agg.get("alias") or agg["function"]
+                group_result[agg_key] = value
+            else:
+                # Multiple aggregations
+                group_result["aggregations"] = {}
+                for agg in aggregations:
+                    value = self._calculate_aggregation(group_records, agg)
+                    agg_key = agg.get("alias") or f"{agg['function']}_{agg['field']}"
+                    group_result["aggregations"][agg_key] = value
+            results.append(group_result)
+        # Apply modifiers (top/bottom)
+        results = self._apply_modifiers(results, aggregations)
+        return {"type": "grouped_aggregation", "group_by": group_by_fields, "results": results}
+    def _calculate_aggregation(  # noqa: C901
+        self, records: List[Dict[str, Any]], agg_spec: Dict[str, Any]
+    ) -> Union[int, float, Dict[str, Any], None]:
+        """Calculate a single aggregation value.
+        Args:
+            records: Records to aggregate
+            agg_spec: Aggregation specification
+        Returns:
+            Aggregated value
+        """
+        func = agg_spec["function"]
+        field = agg_spec["field"]
+        # Handle count(*)
+        if func == "count" and field == "*":
+            return len(records)
+        # Extract field values
+        values = []
+        for record in records:
+            value = self._get_field_value(record, field)
+            if value is not None:
+                values.append(value)
+        # Calculate aggregation
+        if func == "count":
+            return len(values)
+        elif func == "unique_count":
+            return len(set(values))
+        elif func == "sum":
+            return sum(self._to_numeric(v) for v in values) if values else 0
+        elif func == "min":
+            return min(self._to_numeric(v) for v in values) if values else None
+        elif func == "max":
+            return max(self._to_numeric(v) for v in values) if values else None
+        elif func in ["average", "avg"]:
+            if not values:
+                return None
+            numeric_values = [self._to_numeric(v) for v in values]
+            return statistics.mean(numeric_values)
+        elif func in ["median", "med"]:
+            if not values:
+                return None
+            numeric_values = [self._to_numeric(v) for v in values]
+            return statistics.median(numeric_values)
+        elif func in ["std", "standard_deviation"]:
+            if len(values) < 2:
+                return None
+            numeric_values = [self._to_numeric(v) for v in values]
+            return statistics.stdev(numeric_values)
+        elif func in ["percentile", "percentiles", "p", "pct"]:
+            if not values:
+                return None
+            numeric_values = sorted([self._to_numeric(v) for v in values])
+            percentile_values = agg_spec.get("percentile_values", [50])  # Default to median
+            if len(percentile_values) == 1:
+                # Single percentile
+                return self._calculate_percentile(numeric_values, percentile_values[0])
+            else:
+                # Multiple percentiles - return dict
+                result = {}
+                for p in percentile_values:
+                    result[f"p{int(p)}"] = self._calculate_percentile(numeric_values, p)
+                return result
+        elif func in ["percentile_rank", "percentile_ranks", "pct_rank", "pct_ranks"]:
+            if not values:
+                return None
+            numeric_values = sorted([self._to_numeric(v) for v in values])
+            rank_values = agg_spec.get("rank_values", [])
+            if not rank_values:
+                raise TQLError("percentile_rank requires at least one value")
+            if len(rank_values) == 1:
+                # Single rank value
+                return self._calculate_percentile_rank(numeric_values, rank_values[0])
+            else:
+                # Multiple rank values - return dict
+                result = {}
+                for v in rank_values:
+                    result[f"rank_{v}"] = self._calculate_percentile_rank(numeric_values, v)
+                return result
+        else:
+            raise TQLError(f"Unsupported aggregation function: {func}")
+    def _apply_modifiers(
+        self, results: List[Dict[str, Any]], aggregations: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        """Apply top/bottom modifiers to results.
+        Args:
+            results: Aggregation results
+            aggregations: Aggregation specifications with modifiers
+        Returns:
+            Modified results
+        """
+        # Check if any aggregation has modifiers
+        for agg in aggregations:
+            if "modifier" in agg:
+                # Sort results based on the aggregation value
+                agg_key = agg.get("alias") or agg["function"]
+                # Get the value from the result
+                def get_sort_value(result, key=agg_key):
+                    if "aggregations" in result:
+                        return result["aggregations"].get(key, 0)
+                    else:
+                        return result.get(key, 0)
+                # Sort
+                reverse = agg["modifier"] == "top"
+                results = sorted(results, key=get_sort_value, reverse=reverse)
+                # Limit
+                limit = agg.get("limit", 10)
+                results = results[:limit]
+                break  # Only apply first modifier found
+        return results
+    def _get_field_value(self, record: Dict[str, Any], field_path: str) -> Any:
+        """Get a field value from a record, supporting nested fields.
+        Args:
+            record: The record dictionary
+            field_path: Dot-separated field path
+        Returns:
+            The field value or None if not found
+        """
+        parts = field_path.split(".")
+        current = record
+        for part in parts:
+            if isinstance(current, dict) and part in current:
+                current = current[part]
+            else:
+                return None
+        return current
+    def _to_numeric(self, value: Any) -> Union[int, float]:
+        """Convert value to numeric type.
+        Args:
+            value: Value to convert
+        Returns:
+            Numeric value
+        Raises:
+            TQLError: If value cannot be converted
+        """
+        if isinstance(value, (int, float)):
+            return value
+        if isinstance(value, str):
+            try:
+                # Try int first
+                if "." not in value:
+                    return int(value)
+                else:
+                    return float(value)
+            except ValueError:
+                raise TQLError(
+                    f"Cannot convert '{value}' to numeric value. " f"Ensure the field contains numeric data."
+                )
+        raise TQLError(
+            f"Cannot convert {type(value).__name__} to numeric value. " f"Ensure the field contains numeric data."
+        )
+    def _calculate_percentile(self, sorted_values: List[Union[int, float]], percentile: float) -> Optional[float]:
+        """Calculate the percentile value for a sorted list of values.
+        Args:
+            sorted_values: Sorted list of numeric values
+            percentile: Percentile to calculate (0-100)
+        Returns:
+            The percentile value
+        """
+        if not sorted_values:
+            return None
+        if percentile < 0 or percentile > 100:
+            raise TQLError(f"Percentile must be between 0 and 100, got {percentile}")
+        n = len(sorted_values)
+        if n == 1:
+            return sorted_values[0]
+        # Calculate the position using linear interpolation
+        pos = (n - 1) * (percentile / 100.0)
+        lower_idx = int(pos)
+        upper_idx = min(lower_idx + 1, n - 1)
+        if lower_idx == upper_idx:
+            return sorted_values[lower_idx]
+        # Linear interpolation between two values
+        lower_value = sorted_values[lower_idx]
+        upper_value = sorted_values[upper_idx]
+        fraction = pos - lower_idx
+        return lower_value + fraction * (upper_value - lower_value)
+    def _calculate_percentile_rank(self, sorted_values: List[Union[int, float]], value: float) -> Optional[float]:
+        """Calculate the percentile rank of a value within a sorted list.
+        Args:
+            sorted_values: Sorted list of numeric values
+            value: Value to find percentile rank for
+        Returns:
+            The percentile rank (0-100)
+        """
+        if not sorted_values:
+            return None
+        n = len(sorted_values)
+        # Count how many values are less than the target value
+        count_less = 0
+        count_equal = 0
+        for v in sorted_values:
+            if v < value:
+                count_less += 1
+            elif v == value:
+                count_equal += 1
+        # Calculate percentile rank
+        # If value is in the list, use midpoint of its range
+        if count_equal > 0:
+            rank = (count_less + count_equal / 2.0) / n * 100
+        else:
+            # Value not in list, interpolate
+            rank = count_less / n * 100
+        return round(rank, 2)