PyPI - tellaro-query-language - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

tellaro-query-language 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/METADATA +24 -1
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/RECORD +27 -27
tql/core.py +225 -54
tql/core_components/opensearch_operations.py +415 -99
tql/core_components/stats_operations.py +11 -1
tql/evaluator.py +39 -2
tql/evaluator_components/special_expressions.py +25 -6
tql/evaluator_components/value_comparison.py +31 -3
tql/mutator_analyzer.py +640 -242
tql/mutators/__init__.py +5 -1
tql/mutators/dns.py +76 -53
tql/mutators/security.py +101 -100
tql/mutators/string.py +74 -0
tql/opensearch_components/field_mapping.py +9 -3
tql/opensearch_components/lucene_converter.py +12 -0
tql/opensearch_components/query_converter.py +134 -25
tql/opensearch_mappings.py +2 -2
tql/opensearch_stats.py +170 -39
tql/parser.py +92 -37
tql/parser_components/ast_builder.py +37 -1
tql/parser_components/field_extractor.py +9 -1
tql/parser_components/grammar.py +32 -8
tql/post_processor.py +489 -31
tql/stats_evaluator.py +170 -12
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/LICENSE +0 -0
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/WHEEL +0 -0
{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/entry_points.txt +0 -0

tql/stats_evaluator.py CHANGED Viewed

@@ -147,33 +147,56 @@ class TQLStatsEvaluator:
             return {"type": "multiple_aggregations", "results": results}
     def _grouped_aggregation(
-        self, records: List[Dict[str, Any]], aggregations: List[Dict[str, Any]], group_by_fields: List[str]
+        self, records: List[Dict[str, Any]], aggregations: List[Dict[str, Any]], group_by_fields: List[Any]
     ) -> Dict[str, Any]:
         """Perform aggregation with grouping.
         Args:
             records: Records to aggregate
             aggregations: Aggregation specifications
-            group_by_fields: Fields to group by
+            group_by_fields: Fields to group by (can be strings or dicts with bucket_size)
         Returns:
             Grouped aggregation results
         """
+        # Normalize group_by_fields to handle both old (string) and new (dict) formats
+        normalized_fields = []
+        for field in group_by_fields:
+            if isinstance(field, str):
+                # Old format: just field name
+                normalized_fields.append({"field": field, "bucket_size": None})
+            elif isinstance(field, dict):
+                # New format: {"field": "name", "bucket_size": N}
+                normalized_fields.append(field)
+            else:
+                # Shouldn't happen but handle gracefully
+                normalized_fields.append({"field": str(field), "bucket_size": None})
         # Group records
         groups = defaultdict(list)
+        key_mapping = {}  # Maps hashable key to original key
         for record in records:
             # Build group key
             key_parts = []
-            for field in group_by_fields:
-                value = self._get_field_value(record, field)
-                key_parts.append((field, value))
-            key = tuple(key_parts)
-            groups[key].append(record)
+            for field_spec in normalized_fields:
+                field_name = field_spec["field"]
+                value = self._get_field_value(record, field_name)
+                key_parts.append((field_name, value))
+            # Create hashable key - convert unhashable values to strings
+            hashable_key = self._make_hashable_key(key_parts)
+            groups[hashable_key].append(record)
+            # Store mapping from hashable key to original key
+            if hashable_key not in key_mapping:
+                key_mapping[hashable_key] = key_parts
         # Calculate aggregations for each group
         results = []
-        for key, group_records in groups.items():
-            group_result: Dict[str, Any] = {"key": dict(key), "doc_count": len(group_records)}
+        for hashable_key, group_records in groups.items():
+            original_key = key_mapping[hashable_key]
+            group_result: Dict[str, Any] = {"key": dict(original_key), "doc_count": len(group_records)}
             if len(aggregations) == 1:
                 # Single aggregation
@@ -194,7 +217,23 @@ class TQLStatsEvaluator:
         # Apply modifiers (top/bottom)
         results = self._apply_modifiers(results, aggregations)
-        return {"type": "grouped_aggregation", "group_by": group_by_fields, "results": results}
+        # Apply per-field bucket limits
+        results = self._apply_bucket_limits(results, normalized_fields)
+        # Extract just the field names for the response to ensure compatibility
+        # with frontend code that expects strings, not dictionaries
+        group_by_field_names = []
+        for field in group_by_fields:
+            if isinstance(field, str):
+                group_by_field_names.append(field)
+            elif isinstance(field, dict) and "field" in field:
+                group_by_field_names.append(field["field"])
+            else:
+                # Fallback for unexpected formats
+                group_by_field_names.append(str(field))
+        # Return group_by fields as strings for frontend compatibility
+        return {"type": "grouped_aggregation", "group_by": group_by_field_names, "results": results}
     def _calculate_aggregation(  # noqa: C901
         self, records: List[Dict[str, Any]], agg_spec: Dict[str, Any]
@@ -226,7 +265,7 @@ class TQLStatsEvaluator:
         if func == "count":
             return len(values)
         elif func == "unique_count":
-            return len(set(values))
+            return len(self._get_unique_values(values))
         elif func == "sum":
             return sum(self._to_numeric(v) for v in values) if values else 0
         elif func == "min":
@@ -283,7 +322,7 @@ class TQLStatsEvaluator:
                 return result
         elif func in ["values", "unique", "cardinality"]:
             # Return unique values from the field
-            unique_values = list(set(values)) if values else []
+            unique_values = self._get_unique_values(values)
             # Sort the values for consistent output
             try:
                 # Try to sort if values are comparable
@@ -332,6 +371,81 @@ class TQLStatsEvaluator:
         return results
+    def _apply_bucket_limits(
+        self, results: List[Dict[str, Any]], normalized_fields: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        """Apply per-field bucket size limits to results.
+        Args:
+            results: Aggregation results
+            normalized_fields: Group by fields with bucket_size specifications
+        Returns:
+            Results with bucket limits applied
+        """
+        # Check if we have any bucket size limits
+        has_limits = any(field.get("bucket_size") is not None for field in normalized_fields)
+        if not has_limits:
+            return results
+        # For single-level grouping, apply the limit directly
+        if len(normalized_fields) == 1:
+            bucket_size = normalized_fields[0].get("bucket_size")
+            if bucket_size:
+                # Sort by doc_count (most common pattern) and limit
+                results = sorted(results, key=lambda x: x.get("doc_count", 0), reverse=True)
+                results = results[:bucket_size]
+            return results
+        # For multi-level grouping, we need to apply limits hierarchically
+        # First, group results by each level and apply limits
+        # Sort all results by doc_count first
+        results = sorted(results, key=lambda x: x.get("doc_count", 0), reverse=True)
+        # Build a hierarchical structure to apply limits at each level
+        filtered_results = []
+        # Track unique values at each level
+        level_values = {}
+        for level, field_spec in enumerate(normalized_fields):
+            level_values[level] = {}
+        for result in results:
+            # Check if this result should be included based on bucket limits at each level
+            should_include = True
+            # Build the key path for this result
+            key_path = []
+            for level, field_spec in enumerate(normalized_fields):
+                field_name = field_spec["field"]
+                field_value = result["key"].get(field_name)
+                key_path.append(field_value)
+                # For each level, check if we've hit the bucket limit
+                bucket_size = field_spec.get("bucket_size")
+                if bucket_size is not None:
+                    # Build parent key (all fields up to but not including current level)
+                    parent_key = tuple(key_path[:level]) if level > 0 else ()
+                    # Initialize tracking for this parent if needed
+                    if parent_key not in level_values[level]:
+                        level_values[level][parent_key] = set()
+                    # Check if adding this value would exceed the bucket limit
+                    if field_value not in level_values[level][parent_key]:
+                        if len(level_values[level][parent_key]) >= bucket_size:
+                            should_include = False
+                            break
+                        else:
+                            # Reserve this slot
+                            level_values[level][parent_key].add(field_value)
+            if should_include:
+                filtered_results.append(result)
+        return filtered_results
     def _get_field_value(self, record: Dict[str, Any], field_path: str) -> Any:
         """Get a field value from a record, supporting nested fields.
@@ -453,3 +567,47 @@ class TQLStatsEvaluator:
             rank = count_less / n * 100
         return round(rank, 2)
+    def _get_unique_values(self, values: List[Any]) -> List[Any]:
+        """Get unique values from a list, handling unhashable types like dicts.
+        Args:
+            values: List of values that may contain unhashable types
+        Returns:
+            List of unique values
+        """
+        if not values:
+            return []
+        # Try the fast path first - use set if all values are hashable
+        try:
+            return list(set(values))
+        except TypeError:
+            # Some values are unhashable, use slower but safe approach
+            unique_values = []
+            for value in values:
+                if value not in unique_values:
+                    unique_values.append(value)
+            return unique_values
+    def _make_hashable_key(self, key_parts: List[tuple]) -> tuple:
+        """Convert a key with potentially unhashable values to a hashable key.
+        Args:
+            key_parts: List of (field_name, value) tuples
+        Returns:
+            Hashable tuple that can be used as a dictionary key
+        """
+        hashable_parts = []
+        for field_name, value in key_parts:
+            try:
+                # Try to hash the value - if it works, use it as-is
+                hash(value)
+                hashable_parts.append((field_name, value))
+            except TypeError:
+                # Value is unhashable (like dict), convert to string representation
+                hashable_parts.append((field_name, str(value)))
+        return tuple(hashable_parts)

{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{tellaro_query_language-0.2.0.dist-info → tellaro_query_language-0.2.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

tellaro-query-language 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

tellaro-query-language 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl