tellaro-query-language 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
  2. tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
  3. tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
  4. tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
  5. tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
  6. tql/__init__.py +47 -0
  7. tql/analyzer.py +385 -0
  8. tql/cache/__init__.py +7 -0
  9. tql/cache/base.py +25 -0
  10. tql/cache/memory.py +63 -0
  11. tql/cache/redis.py +68 -0
  12. tql/core.py +929 -0
  13. tql/core_components/README.md +92 -0
  14. tql/core_components/__init__.py +20 -0
  15. tql/core_components/file_operations.py +113 -0
  16. tql/core_components/opensearch_operations.py +869 -0
  17. tql/core_components/stats_operations.py +200 -0
  18. tql/core_components/validation_operations.py +599 -0
  19. tql/evaluator.py +379 -0
  20. tql/evaluator_components/README.md +131 -0
  21. tql/evaluator_components/__init__.py +17 -0
  22. tql/evaluator_components/field_access.py +176 -0
  23. tql/evaluator_components/special_expressions.py +296 -0
  24. tql/evaluator_components/value_comparison.py +315 -0
  25. tql/exceptions.py +160 -0
  26. tql/geoip_normalizer.py +233 -0
  27. tql/mutator_analyzer.py +830 -0
  28. tql/mutators/__init__.py +222 -0
  29. tql/mutators/base.py +78 -0
  30. tql/mutators/dns.py +316 -0
  31. tql/mutators/encoding.py +218 -0
  32. tql/mutators/geo.py +363 -0
  33. tql/mutators/list.py +212 -0
  34. tql/mutators/network.py +163 -0
  35. tql/mutators/security.py +225 -0
  36. tql/mutators/string.py +165 -0
  37. tql/opensearch.py +78 -0
  38. tql/opensearch_components/README.md +130 -0
  39. tql/opensearch_components/__init__.py +17 -0
  40. tql/opensearch_components/field_mapping.py +399 -0
  41. tql/opensearch_components/lucene_converter.py +305 -0
  42. tql/opensearch_components/query_converter.py +775 -0
  43. tql/opensearch_mappings.py +309 -0
  44. tql/opensearch_stats.py +451 -0
  45. tql/parser.py +1363 -0
  46. tql/parser_components/README.md +72 -0
  47. tql/parser_components/__init__.py +20 -0
  48. tql/parser_components/ast_builder.py +162 -0
  49. tql/parser_components/error_analyzer.py +101 -0
  50. tql/parser_components/field_extractor.py +112 -0
  51. tql/parser_components/grammar.py +473 -0
  52. tql/post_processor.py +737 -0
  53. tql/scripts.py +124 -0
  54. tql/stats_evaluator.py +444 -0
  55. tql/stats_transformer.py +184 -0
  56. tql/validators.py +110 -0
@@ -0,0 +1,233 @@
1
+ """GeoIP data normalization for TQL.
2
+
3
+ This module normalizes GeoIP data from different MMDB providers (MaxMind and DB-IP)
4
+ into a consistent format for TQL queries.
5
+ """
6
+
7
+ from typing import Any, Dict, Optional
8
+
9
+
10
+ class GeoIPNormalizer:
11
+ """Normalizes GeoIP data from different MMDB providers to ECS format.
12
+
13
+ Follows Elastic Common Schema (ECS) field naming conventions:
14
+ - geo.* fields: https://www.elastic.co/guide/en/ecs/current/ecs-geo.html
15
+ - as.* fields: https://www.elastic.co/guide/en/ecs/current/ecs-as.html
16
+ """
17
+
18
+ @staticmethod
19
+ def normalize( # noqa: C901
20
+ raw_data: Optional[Dict[str, Any]], provider: str, mmdb_type: Optional[str] = None
21
+ ) -> Optional[Dict[str, Any]]:
22
+ """Normalize GeoIP data to ECS-compliant format.
23
+
24
+ Args:
25
+ raw_data: Raw data from MMDB lookup
26
+ provider: Either 'maxmind' or 'dbip'
27
+ mmdb_type: Full type identifier (e.g., 'dbip_pro', 'maxmind_lite')
28
+
29
+ Returns:
30
+ Normalized data dictionary following ECS conventions
31
+ """
32
+ if not raw_data:
33
+ return None
34
+
35
+ normalized = {}
36
+
37
+ # Initialize ECS structure
38
+ geo = {}
39
+ as_info = {}
40
+
41
+ # MaxMind format (GeoLite2/GeoIP2)
42
+ if provider == "maxmind":
43
+ # geo.country_* fields (ECS)
44
+ if "country" in raw_data:
45
+ geo["country_iso_code"] = raw_data["country"].get("iso_code")
46
+ geo["country_name"] = raw_data["country"].get("names", {}).get("en")
47
+
48
+ # geo.city_name (ECS)
49
+ if "city" in raw_data:
50
+ geo["city_name"] = raw_data["city"].get("names", {}).get("en")
51
+
52
+ # geo.postal_code (ECS)
53
+ if "postal" in raw_data:
54
+ geo["postal_code"] = raw_data["postal"].get("code")
55
+
56
+ # geo.location (ECS)
57
+ if "location" in raw_data:
58
+ location = raw_data["location"]
59
+ if location.get("latitude") is not None and location.get("longitude") is not None:
60
+ geo["location"] = {"lat": location["latitude"], "lon": location["longitude"]}
61
+ geo["timezone"] = location.get("time_zone")
62
+
63
+ # geo.region_* fields (ECS)
64
+ if "subdivisions" in raw_data and raw_data["subdivisions"]:
65
+ # Take the first subdivision (state/province)
66
+ subdivision = raw_data["subdivisions"][0]
67
+ geo["region_name"] = subdivision.get("names", {}).get("en")
68
+ geo["region_iso_code"] = subdivision.get("iso_code")
69
+
70
+ # geo.continent_* fields (ECS)
71
+ if "continent" in raw_data:
72
+ geo["continent_code"] = raw_data["continent"].get("code")
73
+ geo["continent_name"] = raw_data["continent"].get("names", {}).get("en")
74
+
75
+ # as.* fields (ECS) - from GeoLite2-ASN or merged data
76
+ if "autonomous_system_number" in raw_data:
77
+ as_info["number"] = raw_data["autonomous_system_number"]
78
+ if "autonomous_system_organization" in raw_data:
79
+ as_info["organization"] = {"name": raw_data["autonomous_system_organization"]}
80
+
81
+ # Traits (GeoIP2 paid data)
82
+ if "traits" in raw_data:
83
+ traits = raw_data["traits"]
84
+ if traits.get("autonomous_system_number"):
85
+ as_info["number"] = traits["autonomous_system_number"]
86
+ if traits.get("autonomous_system_organization"):
87
+ as_info["organization"] = {"name": traits["autonomous_system_organization"]}
88
+ if traits.get("isp"):
89
+ # ISP is not part of standard ECS, store in organization
90
+ if "organization" not in as_info:
91
+ as_info["organization"] = {}
92
+ as_info["organization"]["isp"] = traits["isp"]
93
+
94
+ # Additional non-ECS fields that might be useful
95
+ if traits.get("connection_type"):
96
+ normalized["connection_type"] = traits["connection_type"]
97
+ if traits.get("user_type"):
98
+ normalized["user_type"] = traits["user_type"]
99
+
100
+ # DB-IP format
101
+ elif provider == "dbip":
102
+ # geo.country_* fields (ECS)
103
+ if "country" in raw_data:
104
+ geo["country_iso_code"] = raw_data["country"].get("iso_code")
105
+ geo["country_name"] = raw_data["country"].get("names", {}).get("en")
106
+ # Store EU status as custom field
107
+ if raw_data["country"].get("is_in_european_union") is not None:
108
+ normalized["is_eu"] = raw_data["country"]["is_in_european_union"]
109
+
110
+ # geo.city_name (ECS)
111
+ if "city" in raw_data:
112
+ geo["city_name"] = raw_data["city"].get("names", {}).get("en")
113
+
114
+ # geo.postal_code (ECS)
115
+ if "postal" in raw_data:
116
+ geo["postal_code"] = raw_data["postal"].get("code")
117
+
118
+ # geo.location (ECS)
119
+ if "location" in raw_data:
120
+ location = raw_data["location"]
121
+ if location.get("latitude") is not None and location.get("longitude") is not None:
122
+ geo["location"] = {"lat": location["latitude"], "lon": location["longitude"]}
123
+ geo["timezone"] = location.get("time_zone")
124
+ # Weather code is not part of ECS, store as custom field
125
+ if location.get("weather_code"):
126
+ normalized["weather_code"] = location["weather_code"]
127
+
128
+ # geo.region_* fields (ECS)
129
+ if "subdivisions" in raw_data and raw_data["subdivisions"]:
130
+ # DB-IP can have multiple subdivisions (state, county)
131
+ # First one is usually the primary subdivision (state)
132
+ subdivision = raw_data["subdivisions"][0]
133
+ geo["region_name"] = subdivision.get("names", {}).get("en")
134
+ geo["region_iso_code"] = subdivision.get("iso_code")
135
+
136
+ # If there's a second subdivision (county), store it as custom field
137
+ if len(raw_data["subdivisions"]) > 1:
138
+ county = raw_data["subdivisions"][1]
139
+ county_name = county.get("names", {}).get("en")
140
+ if county_name:
141
+ normalized["county_name"] = county_name
142
+
143
+ # geo.continent_* fields (ECS)
144
+ if "continent" in raw_data:
145
+ geo["continent_code"] = raw_data["continent"].get("code")
146
+ geo["continent_name"] = raw_data["continent"].get("names", {}).get("en")
147
+
148
+ # as.* fields (ECS) - from dbip-asn-lite or traits in dbip-full
149
+ if "autonomous_system_number" in raw_data:
150
+ as_info["number"] = raw_data["autonomous_system_number"]
151
+ if "autonomous_system_organization" in raw_data:
152
+ as_info["organization"] = {"name": raw_data["autonomous_system_organization"]}
153
+
154
+ # Traits (dbip-full data)
155
+ if "traits" in raw_data:
156
+ traits = raw_data["traits"]
157
+ if traits.get("autonomous_system_number"):
158
+ as_info["number"] = traits["autonomous_system_number"]
159
+ if traits.get("autonomous_system_organization"):
160
+ as_info["organization"] = {"name": traits["autonomous_system_organization"]}
161
+ if traits.get("isp"):
162
+ # ISP is not part of standard ECS, store in organization
163
+ if "organization" not in as_info:
164
+ as_info["organization"] = {}
165
+ as_info["organization"]["isp"] = traits["isp"]
166
+ if traits.get("organization"):
167
+ # Store organization separately if different from AS org
168
+ if "organization" not in as_info:
169
+ as_info["organization"] = {}
170
+ as_info["organization"]["name"] = traits["organization"]
171
+
172
+ # Additional non-ECS fields
173
+ if traits.get("connection_type"):
174
+ normalized["connection_type"] = traits["connection_type"]
175
+ if traits.get("user_type"):
176
+ normalized["user_type"] = traits["user_type"]
177
+
178
+ # Return properly nested structure for TQL usage
179
+ # This allows natural access like geo.country_iso_code in queries
180
+ if geo:
181
+ # Remove None values from geo dict
182
+ geo_clean = {k: v for k, v in geo.items() if v is not None}
183
+ if geo_clean: # Only add if there's data
184
+ # Add mmdb_type to geo data
185
+ if mmdb_type:
186
+ geo_clean["mmdb_type"] = mmdb_type
187
+ normalized["geo"] = geo_clean
188
+
189
+ if as_info:
190
+ # Remove None values from as_info dict
191
+ as_clean: Dict[str, Any] = {}
192
+ for k, v in as_info.items():
193
+ if v is not None:
194
+ if k == "organization" and isinstance(v, dict):
195
+ # Clean organization sub-dict
196
+ org_clean = {ok: ov for ok, ov in v.items() if ov is not None}
197
+ if org_clean:
198
+ as_clean[k] = org_clean
199
+ else:
200
+ as_clean[k] = v
201
+ if as_clean: # Only add if there's data
202
+ # Add mmdb_type to as data
203
+ if mmdb_type:
204
+ as_clean["mmdb_type"] = mmdb_type
205
+ normalized["as"] = as_clean
206
+
207
+ # Remove None values from top level
208
+ result = {k: v for k, v in normalized.items() if v is not None}
209
+
210
+ # Return None if result is empty
211
+ return result if result else None
212
+
213
+ @staticmethod
214
+ def merge_data(primary: Dict[str, Any], *additional: Dict[str, Any]) -> Dict[str, Any]:
215
+ """Merge data from multiple MMDB files.
216
+
217
+ Args:
218
+ primary: Primary data source (usually city data)
219
+ *additional: Additional data sources (ASN, country, etc.)
220
+
221
+ Returns:
222
+ Merged dictionary with all available data
223
+ """
224
+ result = primary.copy() if primary else {}
225
+
226
+ for data in additional:
227
+ if data:
228
+ # Don't overwrite existing data, only add missing fields
229
+ for key, value in data.items():
230
+ if key not in result:
231
+ result[key] = value
232
+
233
+ return result