tellaro-query-language 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tellaro_query_language-0.1.0.dist-info/LICENSE +21 -0
- tellaro_query_language-0.1.0.dist-info/METADATA +401 -0
- tellaro_query_language-0.1.0.dist-info/RECORD +56 -0
- tellaro_query_language-0.1.0.dist-info/WHEEL +4 -0
- tellaro_query_language-0.1.0.dist-info/entry_points.txt +7 -0
- tql/__init__.py +47 -0
- tql/analyzer.py +385 -0
- tql/cache/__init__.py +7 -0
- tql/cache/base.py +25 -0
- tql/cache/memory.py +63 -0
- tql/cache/redis.py +68 -0
- tql/core.py +929 -0
- tql/core_components/README.md +92 -0
- tql/core_components/__init__.py +20 -0
- tql/core_components/file_operations.py +113 -0
- tql/core_components/opensearch_operations.py +869 -0
- tql/core_components/stats_operations.py +200 -0
- tql/core_components/validation_operations.py +599 -0
- tql/evaluator.py +379 -0
- tql/evaluator_components/README.md +131 -0
- tql/evaluator_components/__init__.py +17 -0
- tql/evaluator_components/field_access.py +176 -0
- tql/evaluator_components/special_expressions.py +296 -0
- tql/evaluator_components/value_comparison.py +315 -0
- tql/exceptions.py +160 -0
- tql/geoip_normalizer.py +233 -0
- tql/mutator_analyzer.py +830 -0
- tql/mutators/__init__.py +222 -0
- tql/mutators/base.py +78 -0
- tql/mutators/dns.py +316 -0
- tql/mutators/encoding.py +218 -0
- tql/mutators/geo.py +363 -0
- tql/mutators/list.py +212 -0
- tql/mutators/network.py +163 -0
- tql/mutators/security.py +225 -0
- tql/mutators/string.py +165 -0
- tql/opensearch.py +78 -0
- tql/opensearch_components/README.md +130 -0
- tql/opensearch_components/__init__.py +17 -0
- tql/opensearch_components/field_mapping.py +399 -0
- tql/opensearch_components/lucene_converter.py +305 -0
- tql/opensearch_components/query_converter.py +775 -0
- tql/opensearch_mappings.py +309 -0
- tql/opensearch_stats.py +451 -0
- tql/parser.py +1363 -0
- tql/parser_components/README.md +72 -0
- tql/parser_components/__init__.py +20 -0
- tql/parser_components/ast_builder.py +162 -0
- tql/parser_components/error_analyzer.py +101 -0
- tql/parser_components/field_extractor.py +112 -0
- tql/parser_components/grammar.py +473 -0
- tql/post_processor.py +737 -0
- tql/scripts.py +124 -0
- tql/stats_evaluator.py +444 -0
- tql/stats_transformer.py +184 -0
- tql/validators.py +110 -0
tql/geoip_normalizer.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""GeoIP data normalization for TQL.
|
|
2
|
+
|
|
3
|
+
This module normalizes GeoIP data from different MMDB providers (MaxMind and DB-IP)
|
|
4
|
+
into a consistent format for TQL queries.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GeoIPNormalizer:
|
|
11
|
+
"""Normalizes GeoIP data from different MMDB providers to ECS format.
|
|
12
|
+
|
|
13
|
+
Follows Elastic Common Schema (ECS) field naming conventions:
|
|
14
|
+
- geo.* fields: https://www.elastic.co/guide/en/ecs/current/ecs-geo.html
|
|
15
|
+
- as.* fields: https://www.elastic.co/guide/en/ecs/current/ecs-as.html
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def normalize( # noqa: C901
|
|
20
|
+
raw_data: Optional[Dict[str, Any]], provider: str, mmdb_type: Optional[str] = None
|
|
21
|
+
) -> Optional[Dict[str, Any]]:
|
|
22
|
+
"""Normalize GeoIP data to ECS-compliant format.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
raw_data: Raw data from MMDB lookup
|
|
26
|
+
provider: Either 'maxmind' or 'dbip'
|
|
27
|
+
mmdb_type: Full type identifier (e.g., 'dbip_pro', 'maxmind_lite')
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Normalized data dictionary following ECS conventions
|
|
31
|
+
"""
|
|
32
|
+
if not raw_data:
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
normalized = {}
|
|
36
|
+
|
|
37
|
+
# Initialize ECS structure
|
|
38
|
+
geo = {}
|
|
39
|
+
as_info = {}
|
|
40
|
+
|
|
41
|
+
# MaxMind format (GeoLite2/GeoIP2)
|
|
42
|
+
if provider == "maxmind":
|
|
43
|
+
# geo.country_* fields (ECS)
|
|
44
|
+
if "country" in raw_data:
|
|
45
|
+
geo["country_iso_code"] = raw_data["country"].get("iso_code")
|
|
46
|
+
geo["country_name"] = raw_data["country"].get("names", {}).get("en")
|
|
47
|
+
|
|
48
|
+
# geo.city_name (ECS)
|
|
49
|
+
if "city" in raw_data:
|
|
50
|
+
geo["city_name"] = raw_data["city"].get("names", {}).get("en")
|
|
51
|
+
|
|
52
|
+
# geo.postal_code (ECS)
|
|
53
|
+
if "postal" in raw_data:
|
|
54
|
+
geo["postal_code"] = raw_data["postal"].get("code")
|
|
55
|
+
|
|
56
|
+
# geo.location (ECS)
|
|
57
|
+
if "location" in raw_data:
|
|
58
|
+
location = raw_data["location"]
|
|
59
|
+
if location.get("latitude") is not None and location.get("longitude") is not None:
|
|
60
|
+
geo["location"] = {"lat": location["latitude"], "lon": location["longitude"]}
|
|
61
|
+
geo["timezone"] = location.get("time_zone")
|
|
62
|
+
|
|
63
|
+
# geo.region_* fields (ECS)
|
|
64
|
+
if "subdivisions" in raw_data and raw_data["subdivisions"]:
|
|
65
|
+
# Take the first subdivision (state/province)
|
|
66
|
+
subdivision = raw_data["subdivisions"][0]
|
|
67
|
+
geo["region_name"] = subdivision.get("names", {}).get("en")
|
|
68
|
+
geo["region_iso_code"] = subdivision.get("iso_code")
|
|
69
|
+
|
|
70
|
+
# geo.continent_* fields (ECS)
|
|
71
|
+
if "continent" in raw_data:
|
|
72
|
+
geo["continent_code"] = raw_data["continent"].get("code")
|
|
73
|
+
geo["continent_name"] = raw_data["continent"].get("names", {}).get("en")
|
|
74
|
+
|
|
75
|
+
# as.* fields (ECS) - from GeoLite2-ASN or merged data
|
|
76
|
+
if "autonomous_system_number" in raw_data:
|
|
77
|
+
as_info["number"] = raw_data["autonomous_system_number"]
|
|
78
|
+
if "autonomous_system_organization" in raw_data:
|
|
79
|
+
as_info["organization"] = {"name": raw_data["autonomous_system_organization"]}
|
|
80
|
+
|
|
81
|
+
# Traits (GeoIP2 paid data)
|
|
82
|
+
if "traits" in raw_data:
|
|
83
|
+
traits = raw_data["traits"]
|
|
84
|
+
if traits.get("autonomous_system_number"):
|
|
85
|
+
as_info["number"] = traits["autonomous_system_number"]
|
|
86
|
+
if traits.get("autonomous_system_organization"):
|
|
87
|
+
as_info["organization"] = {"name": traits["autonomous_system_organization"]}
|
|
88
|
+
if traits.get("isp"):
|
|
89
|
+
# ISP is not part of standard ECS, store in organization
|
|
90
|
+
if "organization" not in as_info:
|
|
91
|
+
as_info["organization"] = {}
|
|
92
|
+
as_info["organization"]["isp"] = traits["isp"]
|
|
93
|
+
|
|
94
|
+
# Additional non-ECS fields that might be useful
|
|
95
|
+
if traits.get("connection_type"):
|
|
96
|
+
normalized["connection_type"] = traits["connection_type"]
|
|
97
|
+
if traits.get("user_type"):
|
|
98
|
+
normalized["user_type"] = traits["user_type"]
|
|
99
|
+
|
|
100
|
+
# DB-IP format
|
|
101
|
+
elif provider == "dbip":
|
|
102
|
+
# geo.country_* fields (ECS)
|
|
103
|
+
if "country" in raw_data:
|
|
104
|
+
geo["country_iso_code"] = raw_data["country"].get("iso_code")
|
|
105
|
+
geo["country_name"] = raw_data["country"].get("names", {}).get("en")
|
|
106
|
+
# Store EU status as custom field
|
|
107
|
+
if raw_data["country"].get("is_in_european_union") is not None:
|
|
108
|
+
normalized["is_eu"] = raw_data["country"]["is_in_european_union"]
|
|
109
|
+
|
|
110
|
+
# geo.city_name (ECS)
|
|
111
|
+
if "city" in raw_data:
|
|
112
|
+
geo["city_name"] = raw_data["city"].get("names", {}).get("en")
|
|
113
|
+
|
|
114
|
+
# geo.postal_code (ECS)
|
|
115
|
+
if "postal" in raw_data:
|
|
116
|
+
geo["postal_code"] = raw_data["postal"].get("code")
|
|
117
|
+
|
|
118
|
+
# geo.location (ECS)
|
|
119
|
+
if "location" in raw_data:
|
|
120
|
+
location = raw_data["location"]
|
|
121
|
+
if location.get("latitude") is not None and location.get("longitude") is not None:
|
|
122
|
+
geo["location"] = {"lat": location["latitude"], "lon": location["longitude"]}
|
|
123
|
+
geo["timezone"] = location.get("time_zone")
|
|
124
|
+
# Weather code is not part of ECS, store as custom field
|
|
125
|
+
if location.get("weather_code"):
|
|
126
|
+
normalized["weather_code"] = location["weather_code"]
|
|
127
|
+
|
|
128
|
+
# geo.region_* fields (ECS)
|
|
129
|
+
if "subdivisions" in raw_data and raw_data["subdivisions"]:
|
|
130
|
+
# DB-IP can have multiple subdivisions (state, county)
|
|
131
|
+
# First one is usually the primary subdivision (state)
|
|
132
|
+
subdivision = raw_data["subdivisions"][0]
|
|
133
|
+
geo["region_name"] = subdivision.get("names", {}).get("en")
|
|
134
|
+
geo["region_iso_code"] = subdivision.get("iso_code")
|
|
135
|
+
|
|
136
|
+
# If there's a second subdivision (county), store it as custom field
|
|
137
|
+
if len(raw_data["subdivisions"]) > 1:
|
|
138
|
+
county = raw_data["subdivisions"][1]
|
|
139
|
+
county_name = county.get("names", {}).get("en")
|
|
140
|
+
if county_name:
|
|
141
|
+
normalized["county_name"] = county_name
|
|
142
|
+
|
|
143
|
+
# geo.continent_* fields (ECS)
|
|
144
|
+
if "continent" in raw_data:
|
|
145
|
+
geo["continent_code"] = raw_data["continent"].get("code")
|
|
146
|
+
geo["continent_name"] = raw_data["continent"].get("names", {}).get("en")
|
|
147
|
+
|
|
148
|
+
# as.* fields (ECS) - from dbip-asn-lite or traits in dbip-full
|
|
149
|
+
if "autonomous_system_number" in raw_data:
|
|
150
|
+
as_info["number"] = raw_data["autonomous_system_number"]
|
|
151
|
+
if "autonomous_system_organization" in raw_data:
|
|
152
|
+
as_info["organization"] = {"name": raw_data["autonomous_system_organization"]}
|
|
153
|
+
|
|
154
|
+
# Traits (dbip-full data)
|
|
155
|
+
if "traits" in raw_data:
|
|
156
|
+
traits = raw_data["traits"]
|
|
157
|
+
if traits.get("autonomous_system_number"):
|
|
158
|
+
as_info["number"] = traits["autonomous_system_number"]
|
|
159
|
+
if traits.get("autonomous_system_organization"):
|
|
160
|
+
as_info["organization"] = {"name": traits["autonomous_system_organization"]}
|
|
161
|
+
if traits.get("isp"):
|
|
162
|
+
# ISP is not part of standard ECS, store in organization
|
|
163
|
+
if "organization" not in as_info:
|
|
164
|
+
as_info["organization"] = {}
|
|
165
|
+
as_info["organization"]["isp"] = traits["isp"]
|
|
166
|
+
if traits.get("organization"):
|
|
167
|
+
# Store organization separately if different from AS org
|
|
168
|
+
if "organization" not in as_info:
|
|
169
|
+
as_info["organization"] = {}
|
|
170
|
+
as_info["organization"]["name"] = traits["organization"]
|
|
171
|
+
|
|
172
|
+
# Additional non-ECS fields
|
|
173
|
+
if traits.get("connection_type"):
|
|
174
|
+
normalized["connection_type"] = traits["connection_type"]
|
|
175
|
+
if traits.get("user_type"):
|
|
176
|
+
normalized["user_type"] = traits["user_type"]
|
|
177
|
+
|
|
178
|
+
# Return properly nested structure for TQL usage
|
|
179
|
+
# This allows natural access like geo.country_iso_code in queries
|
|
180
|
+
if geo:
|
|
181
|
+
# Remove None values from geo dict
|
|
182
|
+
geo_clean = {k: v for k, v in geo.items() if v is not None}
|
|
183
|
+
if geo_clean: # Only add if there's data
|
|
184
|
+
# Add mmdb_type to geo data
|
|
185
|
+
if mmdb_type:
|
|
186
|
+
geo_clean["mmdb_type"] = mmdb_type
|
|
187
|
+
normalized["geo"] = geo_clean
|
|
188
|
+
|
|
189
|
+
if as_info:
|
|
190
|
+
# Remove None values from as_info dict
|
|
191
|
+
as_clean: Dict[str, Any] = {}
|
|
192
|
+
for k, v in as_info.items():
|
|
193
|
+
if v is not None:
|
|
194
|
+
if k == "organization" and isinstance(v, dict):
|
|
195
|
+
# Clean organization sub-dict
|
|
196
|
+
org_clean = {ok: ov for ok, ov in v.items() if ov is not None}
|
|
197
|
+
if org_clean:
|
|
198
|
+
as_clean[k] = org_clean
|
|
199
|
+
else:
|
|
200
|
+
as_clean[k] = v
|
|
201
|
+
if as_clean: # Only add if there's data
|
|
202
|
+
# Add mmdb_type to as data
|
|
203
|
+
if mmdb_type:
|
|
204
|
+
as_clean["mmdb_type"] = mmdb_type
|
|
205
|
+
normalized["as"] = as_clean
|
|
206
|
+
|
|
207
|
+
# Remove None values from top level
|
|
208
|
+
result = {k: v for k, v in normalized.items() if v is not None}
|
|
209
|
+
|
|
210
|
+
# Return None if result is empty
|
|
211
|
+
return result if result else None
|
|
212
|
+
|
|
213
|
+
@staticmethod
|
|
214
|
+
def merge_data(primary: Dict[str, Any], *additional: Dict[str, Any]) -> Dict[str, Any]:
|
|
215
|
+
"""Merge data from multiple MMDB files.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
primary: Primary data source (usually city data)
|
|
219
|
+
*additional: Additional data sources (ASN, country, etc.)
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
Merged dictionary with all available data
|
|
223
|
+
"""
|
|
224
|
+
result = primary.copy() if primary else {}
|
|
225
|
+
|
|
226
|
+
for data in additional:
|
|
227
|
+
if data:
|
|
228
|
+
# Don't overwrite existing data, only add missing fields
|
|
229
|
+
for key, value in data.items():
|
|
230
|
+
if key not in result:
|
|
231
|
+
result[key] = value
|
|
232
|
+
|
|
233
|
+
return result
|