tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,399 @@
1
+ """Field mapping support for OpenSearch backend.
2
+
3
+ This module provides the FieldMapping class for intelligent field selection
4
+ based on operators and field types in OpenSearch.
5
+ """
6
+
7
+ from typing import Any, Dict, Optional
8
+
9
+ from ..exceptions import TQLTypeError, TQLUnsupportedOperationError
10
+
11
+
12
+ class FieldMapping:
13
+ """Represents field mapping information for intelligent field selection."""
14
+
15
+ def __init__(self, mapping_info: Dict[str, Any]): # noqa: C901
16
+ """Initialize field mapping.
17
+
18
+ Args:
19
+ mapping_info: Dictionary containing field mapping information.
20
+ Supports multiple formats:
21
+
22
+ 1. OpenSearch-style mapping with subfields:
23
+ {
24
+ "type": "text",
25
+ "fields": {
26
+ "keyword": {"type": "keyword"},
27
+ "english": {"type": "text", "analyzer": "english"}
28
+ }
29
+ }
30
+
31
+ 2. Flat format with field variants:
32
+ {
33
+ "field_name": "keyword",
34
+ "field_name.text": {"type": "text", "analyzer": "standard"},
35
+ "field_name.english": {"type": "text", "analyzer": "english"}
36
+ }
37
+
38
+ 3. Legacy format:
39
+ {
40
+ "field_name": "keyword",
41
+ "field_name.text": "text",
42
+ "analyzer": "standard"
43
+ }
44
+ """
45
+ self.mappings = mapping_info
46
+ self.keyword_field = None
47
+ self.text_fields = {} # analyzer -> field_name mapping
48
+ self.default_analyzer = mapping_info.get("analyzer", "standard")
49
+ self.field_types = {} # field_name -> type mapping for all fields
50
+ self.base_field_name = None # The main field name without suffixes
51
+
52
+ # Check if this is an OpenSearch-style mapping with "type"
53
+ if "type" in mapping_info and not any(
54
+ k for k in mapping_info.keys() if k not in ["type", "fields", "analyzer"]
55
+ ):
56
+ # This is an OpenSearch-style single field mapping
57
+ base_type = mapping_info["type"]
58
+ subfields = mapping_info.get("fields", {})
59
+
60
+ # Determine base field name from context (will be set by backend)
61
+ # For now, use empty string as placeholder
62
+ self.base_field_name = ""
63
+
64
+ # Process base field
65
+ if base_type == "keyword":
66
+ self.keyword_field = self.base_field_name
67
+ self.field_types[self.base_field_name] = "keyword"
68
+ elif base_type == "text":
69
+ analyzer = mapping_info.get("analyzer", "standard")
70
+ self.text_fields[analyzer] = self.base_field_name
71
+ self.field_types[self.base_field_name] = "text"
72
+ else:
73
+ self.field_types[self.base_field_name] = base_type
74
+
75
+ # Process subfields
76
+ for subfield_name, subfield_config in subfields.items():
77
+ if isinstance(subfield_config, dict):
78
+ subfield_type = subfield_config.get("type")
79
+ field_path = (
80
+ f"{self.base_field_name}.{subfield_name}" if self.base_field_name else f".{subfield_name}"
81
+ )
82
+
83
+ if subfield_type == "keyword":
84
+ self.keyword_field = field_path
85
+ self.field_types[field_path] = "keyword"
86
+ elif subfield_type == "text":
87
+ analyzer = subfield_config.get("analyzer", "standard")
88
+ self.text_fields[analyzer] = field_path
89
+ self.field_types[field_path] = "text"
90
+ elif subfield_type:
91
+ self.field_types[field_path] = subfield_type
92
+ else:
93
+ # Original flat format parsing
94
+ for field_name, field_config in mapping_info.items():
95
+ if field_name == "analyzer":
96
+ continue
97
+
98
+ # Extract base field name (without .text, .keyword suffixes)
99
+ if not self.base_field_name and not field_name.startswith("_"):
100
+ if "." not in field_name:
101
+ self.base_field_name = field_name
102
+ else:
103
+ # Get the part before the first dot
104
+ self.base_field_name = field_name.split(".")[0]
105
+
106
+ if isinstance(field_config, dict):
107
+ # New format: {"type": "text", "analyzer": "english"}
108
+ field_type = field_config.get("type")
109
+ analyzer = field_config.get("analyzer", "standard")
110
+
111
+ if field_type:
112
+ self.field_types[field_name] = field_type
113
+
114
+ if field_type == "keyword":
115
+ self.keyword_field = field_name
116
+ elif field_type == "text":
117
+ self.text_fields[analyzer] = field_name
118
+ else:
119
+ # Legacy format: "keyword" or "text" or other types
120
+ field_type = field_config
121
+ self.field_types[field_name] = field_type
122
+
123
+ if field_type == "keyword":
124
+ self.keyword_field = field_name
125
+ elif field_type == "text":
126
+ # Use default analyzer for legacy text fields
127
+ self.text_fields[self.default_analyzer] = field_name
128
+
129
+ def set_base_field_name(self, base_field_name: str): # noqa: C901
130
+ """Set the base field name and update field paths for OpenSearch-style mappings.
131
+
132
+ Args:
133
+ base_field_name: The base field name to use
134
+ """
135
+ if self.base_field_name == "": # Only update if it was a placeholder
136
+ old_base = self.base_field_name
137
+ self.base_field_name = base_field_name
138
+
139
+ # Update all field paths
140
+ new_field_types = {}
141
+ for field_path, field_type in self.field_types.items():
142
+ if field_path == old_base:
143
+ new_field_types[base_field_name] = field_type
144
+ elif field_path.startswith("."):
145
+ new_field_types[f"{base_field_name}{field_path}"] = field_type
146
+ else:
147
+ new_field_types[field_path] = field_type
148
+ self.field_types = new_field_types
149
+
150
+ # Update keyword field
151
+ if self.keyword_field == old_base:
152
+ self.keyword_field = base_field_name
153
+ elif self.keyword_field is not None and self.keyword_field.startswith("."):
154
+ self.keyword_field = f"{base_field_name}{self.keyword_field}"
155
+ elif self.keyword_field == "":
156
+ # If keyword field is empty string (from base type), set it to the base field name
157
+ self.keyword_field = base_field_name
158
+
159
+ # Update text fields
160
+ new_text_fields = {}
161
+ for analyzer, field_path in self.text_fields.items():
162
+ if field_path == old_base:
163
+ new_text_fields[analyzer] = base_field_name
164
+ elif field_path.startswith("."):
165
+ new_text_fields[analyzer] = f"{base_field_name}{field_path}"
166
+ else:
167
+ new_text_fields[analyzer] = field_path
168
+ self.text_fields = new_text_fields
169
+
170
+ def get_text_field_for_analyzer(self, preferred_analyzer: Optional[str] = None) -> Optional[str]:
171
+ """Get the best text field for the given analyzer preference.
172
+
173
+ Args:
174
+ preferred_analyzer: Preferred analyzer (e.g., 'english', 'autocomplete')
175
+
176
+ Returns:
177
+ Field name for the best matching text field, or None if no text fields
178
+ """
179
+ if not self.text_fields:
180
+ return None
181
+
182
+ # Try exact match first
183
+ if preferred_analyzer and preferred_analyzer in self.text_fields:
184
+ return self.text_fields[preferred_analyzer]
185
+
186
+ # Try default analyzer
187
+ if self.default_analyzer in self.text_fields:
188
+ return self.text_fields[self.default_analyzer]
189
+
190
+ # Try standard analyzer as fallback
191
+ if "standard" in self.text_fields:
192
+ return self.text_fields["standard"]
193
+
194
+ # Return any available text field
195
+ return next(iter(self.text_fields.values()))
196
+
197
+ def get_field_for_operator(self, operator: str, preferred_analyzer: Optional[str] = None) -> str: # noqa: C901
198
+ """Get the appropriate field name for the given operator.
199
+
200
+ Args:
201
+ operator: The TQL operator being used
202
+ preferred_analyzer: Preferred analyzer for text operations
203
+
204
+ Returns:
205
+ The field name to use
206
+
207
+ Raises:
208
+ TQLUnsupportedOperationError: If operator is not supported for available fields
209
+ """
210
+ # Operators that work best with keyword fields (exact matching)
211
+ keyword_operators = {
212
+ "eq",
213
+ "=",
214
+ "ne",
215
+ "!=",
216
+ "in",
217
+ "not_in",
218
+ "exists",
219
+ "not_exists",
220
+ "is",
221
+ "any",
222
+ "all",
223
+ "not_any",
224
+ "not_all",
225
+ }
226
+
227
+ # Operators that work best with text fields (full-text search)
228
+ text_operators = {"contains", "regexp", "not_regexp"}
229
+
230
+ # Operators that require numeric/date fields
231
+ range_operators = {">", ">=", "<", "<=", "gt", "gte", "lt", "lte", "between", "not_between"}
232
+
233
+ # Operators that work with both but prefer keyword
234
+ wildcard_operators = {"startswith", "endswith"}
235
+
236
+ if operator in keyword_operators:
237
+ if self.keyword_field:
238
+ # Return base field name if keyword field is empty (happens with simple type mappings)
239
+ return self.keyword_field
240
+ else:
241
+ # Check if we have numeric/IP fields - they also support equality
242
+ for field_name, field_type in self.field_types.items():
243
+ if field_type in {"integer", "long", "float", "double", "boolean", "date", "ip"}:
244
+ return field_name
245
+ # Fallback to any available text field
246
+ text_field = self.get_text_field_for_analyzer(preferred_analyzer)
247
+ if text_field:
248
+ return text_field
249
+ # If we have any field types at all, return the first one
250
+ # This handles cases where we have fields but they don't match the above categories
251
+ if self.field_types:
252
+ # Return the first non-empty field name
253
+ for field_name in self.field_types.keys():
254
+ if field_name: # Skip empty string keys
255
+ return field_name
256
+ # If no fields at all, return the base field name
257
+ if self.base_field_name:
258
+ return self.base_field_name
259
+ # Last resort - return empty string
260
+ return ""
261
+ elif operator in text_operators:
262
+ # Try to get text field with preferred analyzer
263
+ text_field = self.get_text_field_for_analyzer(preferred_analyzer)
264
+ if text_field:
265
+ return text_field
266
+ elif self.keyword_field:
267
+ # Will need special handling for wildcard conversion
268
+ # Return base field name if keyword field is empty
269
+ return self.keyword_field
270
+ elif operator in wildcard_operators:
271
+ # Prefer keyword for wildcard operations
272
+ if self.keyword_field:
273
+ # Return base field name if keyword field is empty
274
+ return self.keyword_field
275
+ else:
276
+ text_field = self.get_text_field_for_analyzer(preferred_analyzer)
277
+ if text_field:
278
+ return text_field
279
+ elif operator in range_operators:
280
+ # Range operators prefer numeric/date fields but can work with keyword fields
281
+ # Check what field types we have
282
+ has_numeric_or_date = any(
283
+ ft in {"integer", "long", "float", "double", "date"} for ft in self.field_types.values()
284
+ )
285
+
286
+ if has_numeric_or_date:
287
+ # Return the first numeric/date field found
288
+ for field_name, field_type in self.field_types.items():
289
+ if field_type in {"integer", "long", "float", "double", "date"}:
290
+ return field_name
291
+
292
+ # No numeric fields - try keyword field (OpenSearch supports range queries on keywords)
293
+ if self.keyword_field:
294
+ # Return base field name if keyword field is empty
295
+ return self.keyword_field
296
+
297
+ # Only text fields available - this won't work
298
+ if self.text_fields:
299
+ field_name = self.base_field_name or "field"
300
+ raise TQLTypeError(
301
+ field=field_name,
302
+ field_type="text",
303
+ operator=operator,
304
+ valid_operators=["=", "!=", "contains", "startswith", "endswith"],
305
+ )
306
+ elif operator in {"cidr", "not_cidr"}:
307
+ # CIDR works best with IP field type
308
+ # First check for IP fields
309
+ for field_name, field_type in self.field_types.items():
310
+ if field_type == "ip":
311
+ return field_name
312
+
313
+ # Fallback to keyword field
314
+ if self.keyword_field:
315
+ # Return base field name if keyword field is empty
316
+ return self.keyword_field
317
+ else:
318
+ raise TQLUnsupportedOperationError("CIDR operator requires keyword or IP field type")
319
+
320
+ # If we get here, no suitable field was found
321
+ available_types = []
322
+ if self.keyword_field:
323
+ available_types.append(f"{self.keyword_field}(keyword)")
324
+ for analyzer, field_name in self.text_fields.items():
325
+ available_types.append(f"{field_name}(text:{analyzer})")
326
+
327
+ raise TQLUnsupportedOperationError(
328
+ f"Operator '{operator}' is not supported for available field types: {available_types}"
329
+ )
330
+
331
+ def needs_wildcard_conversion(self, operator: str, preferred_analyzer: Optional[str] = None) -> bool:
332
+ """Check if operator needs wildcard conversion for keyword fields.
333
+
334
+ Args:
335
+ operator: The TQL operator
336
+ preferred_analyzer: Preferred analyzer for text operations
337
+
338
+ Returns:
339
+ True if wildcard conversion is needed
340
+ """
341
+ text_operators = {"contains"}
342
+ selected_field = self.get_field_for_operator(operator, preferred_analyzer)
343
+
344
+ return operator in text_operators and selected_field == self.keyword_field and not self.text_fields
345
+
346
+ def validate_operator_for_field_type(self, operator: str, raise_on_error: bool = True) -> bool:
347
+ """Validate if an operator is compatible with available field types.
348
+
349
+ Args:
350
+ operator: The TQL operator to validate
351
+ raise_on_error: If True, raise TQLTypeError on incompatibility
352
+
353
+ Returns:
354
+ True if operator is compatible, False otherwise
355
+
356
+ Raises:
357
+ TQLTypeError: If operator is incompatible and raise_on_error is True
358
+ """
359
+ # Define operator compatibility rules
360
+ numeric_types = {"integer", "long", "float", "double"}
361
+ range_operators = {">", ">=", "<", "<=", "gt", "gte", "lt", "lte", "between", "not_between"}
362
+
363
+ # Check if we have appropriate fields for range operators
364
+ if operator in range_operators:
365
+ has_numeric = any(ft in numeric_types for ft in self.field_types.values())
366
+ has_keyword = self.keyword_field is not None
367
+
368
+ # Range operators work best with numeric fields, but OpenSearch also supports them on keyword fields
369
+ # Only fail if we have text fields only
370
+ if not has_numeric and not has_keyword:
371
+ if self.text_fields and raise_on_error:
372
+ field_name = self.base_field_name or "field"
373
+ raise TQLTypeError(
374
+ field=field_name,
375
+ field_type="text",
376
+ operator=operator,
377
+ valid_operators=["=", "!=", "contains", "startswith", "endswith"],
378
+ )
379
+ elif not raise_on_error:
380
+ return False
381
+
382
+ # CIDR operator requires IP or keyword field
383
+ if operator == "cidr":
384
+ has_ip = any(ft == "ip" for ft in self.field_types.values())
385
+ has_keyword = self.keyword_field is not None
386
+
387
+ if not has_ip and not has_keyword:
388
+ if raise_on_error:
389
+ field_name = self.base_field_name or "field"
390
+ field_type = next(iter(self.field_types.values()), "unknown")
391
+ raise TQLTypeError(
392
+ field=field_name,
393
+ field_type=field_type,
394
+ operator=operator,
395
+ valid_operators=["=", "!=", "contains"] if field_type == "text" else [],
396
+ )
397
+ return False
398
+
399
+ return True