subcat 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
subcat/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .subcat import SubCat
2
+ from .detector import Detector
3
+ from .logger import Logger
4
+ from .navigator import Navigator
5
+ from .config import Config
subcat/config.py ADDED
@@ -0,0 +1,36 @@
1
+ import yaml
2
+ import os
3
+
4
+
5
+ class Config:
6
+ def __init__(self, logger=None, config: str = 'config.yaml'):
7
+ self.logger = logger
8
+ # Check if config is an absolute path; if not, use current file's directory.
9
+ if not os.path.isabs(config):
10
+ dir_path = os.path.dirname(os.path.realpath(__file__))
11
+ self.config = os.path.join(dir_path, config)
12
+ else:
13
+ self.config = config
14
+
15
+ # Check if the file exists
16
+ if not os.path.exists(self.config):
17
+ if self.logger:
18
+ self.logger.error(f"Config file not found: {self.config}")
19
+ else:
20
+ print(f"Config file not found: {self.config}")
21
+ self.config = None
22
+
23
+ def read(self, module: str):
24
+ if not self.config:
25
+ return False
26
+ try:
27
+ with open(self.config, 'r') as f:
28
+ data = yaml.safe_load(f)
29
+ # Return the module data if present, else False
30
+ return data.get(module, False)
31
+ except Exception as e:
32
+ if self.logger:
33
+ self.logger.error(f"Failed to read config file: {e}")
34
+ else:
35
+ print(f"Failed to read config file: {e}")
36
+ return False
subcat/detector.py ADDED
@@ -0,0 +1,213 @@
1
+ import re
2
+ import socket
3
+ import ipaddress
4
+ import os
5
+ import json
6
+ import concurrent.futures
7
+ if __package__:
8
+ from .navigator import Navigator
9
+ else:
10
+ from navigator import Navigator
11
+
12
+
13
+ class Detector:
14
+ def __init__(self, logger=None):
15
+ self.logger = logger
16
+ self.fingerprints = self.load_fingerprints()
17
+
18
+ def load_fingerprints(self) -> dict:
19
+ """
20
+ Load fingerprints from the fingerprints.json file located in the same directory.
21
+ """
22
+ fingerprints_file = os.path.join(os.path.dirname(__file__), 'fingerprints.json')
23
+ try:
24
+ with open(fingerprints_file) as f:
25
+ cached = json.load(f)
26
+ return cached.get('apps', {})
27
+ except Exception as e:
28
+ if self.logger:
29
+ self.logger.error(f"Failed to load fingerprints: {e}")
30
+ return {}
31
+
32
+ def get_tls_certificate(self, hostname, port=443):
33
+ import ssl
34
+ context = ssl.create_default_context()
35
+ conn = socket.create_connection((hostname, port))
36
+ sock = context.wrap_socket(conn, server_hostname=hostname)
37
+ cert = sock.getpeercert()
38
+ sock.close()
39
+ return cert
40
+
41
+ def extract_tls_info(self, cert):
42
+ details = ""
43
+ subject = cert.get("subject", [])
44
+ issuer = cert.get("issuer", [])
45
+ for tup in subject:
46
+ details += " ".join(val for key, val in tup) + " "
47
+ for tup in issuer:
48
+ details += " ".join(val for key, val in tup) + " "
49
+ return details.strip()
50
+
51
+ def get_cname(self, target):
52
+ try:
53
+ hostname, aliaslist, _ = socket.gethostbyname_ex(target)
54
+ return aliaslist
55
+ except Exception:
56
+ return []
57
+
58
+ def load_aws_ranges(self, url: str = "https://ip-ranges.amazonaws.com/ip-ranges.json") -> dict:
59
+ try:
60
+ with Navigator(debug=self.logger is not None, logger=self.logger) as nav:
61
+ return nav.request(url, method="GET", response_type="json")
62
+ except Exception as e:
63
+ if self.logger:
64
+ self.logger.debug(f"Error loading AWS ranges: {e}")
65
+ return {}
66
+
67
+ def is_ip_in_aws(self, ip, aws_ranges) -> bool:
68
+ try:
69
+ ip_obj = ipaddress.ip_address(ip)
70
+ for prefix in aws_ranges.get("prefixes", []):
71
+ network = ipaddress.ip_network(prefix["ip_prefix"])
72
+ if ip_obj in network:
73
+ return True
74
+ except Exception:
75
+ pass
76
+ return False
77
+
78
+ def detect(self, domain: str, response) -> list:
79
+ """
80
+ Detect technologies for the given domain using a single Navigator response.
81
+ This version applies threading for faster detection.
82
+ """
83
+ detected = []
84
+ page_content = response.text
85
+ headers = response.headers
86
+ headers_normalized = {k.lower(): v for k, v in headers.items()} if headers else {}
87
+
88
+ # Get TLS info once.
89
+ tls_info = ""
90
+ try:
91
+ cert = self.get_tls_certificate(domain)
92
+ tls_info = self.extract_tls_info(cert)
93
+ except Exception as e:
94
+ if self.logger:
95
+ self.logger.debug(f"TLS detection failed for {domain}: {e}")
96
+
97
+ # Get CNAME records once.
98
+ cname_records = self.get_cname(domain)
99
+
100
+ # Define a function to check each technology.
101
+ def check_tech(tech, rules):
102
+ # Check header rules.
103
+ if "headers" in rules:
104
+ for header, patterns in rules["headers"].items():
105
+ header_value = headers_normalized.get(header.lower(), "")
106
+ if header_value:
107
+ if isinstance(patterns, list):
108
+ for pattern in patterns:
109
+ if re.search(pattern, header_value, re.IGNORECASE):
110
+ return tech
111
+ elif isinstance(patterns, str):
112
+ if re.search(patterns, header_value, re.IGNORECASE):
113
+ return tech
114
+
115
+ # Check HTML rules.
116
+ if "html" in rules and page_content:
117
+ patterns = rules["html"]
118
+ if isinstance(patterns, list):
119
+ for pattern in patterns:
120
+ if re.search(pattern, page_content, re.IGNORECASE):
121
+ return tech
122
+ elif isinstance(patterns, str):
123
+ if re.search(patterns, page_content, re.IGNORECASE):
124
+ return tech
125
+
126
+ # Check meta rules.
127
+ if "meta" in rules and page_content:
128
+ patterns = rules["meta"]
129
+ if isinstance(patterns, list):
130
+ for pattern in patterns:
131
+ if re.search(pattern, page_content, re.IGNORECASE):
132
+ return tech
133
+ elif isinstance(patterns, str):
134
+ if re.search(patterns, page_content, re.IGNORECASE):
135
+ return tech
136
+
137
+ # Check script rules.
138
+ if "script" in rules and page_content:
139
+ patterns = rules["script"]
140
+ script_srcs = re.findall(r'<script[^>]+src=["\'](.*?)["\']', page_content, re.IGNORECASE)
141
+ if isinstance(patterns, list):
142
+ for pattern in patterns:
143
+ for src in script_srcs:
144
+ if re.search(pattern, src, re.IGNORECASE):
145
+ return tech
146
+ elif isinstance(patterns, str):
147
+ for src in script_srcs:
148
+ if re.search(patterns, src, re.IGNORECASE):
149
+ return tech
150
+
151
+ # Check TLS rules.
152
+ if tls_info and "tls" in rules:
153
+ patterns = rules["tls"]
154
+ if isinstance(patterns, list):
155
+ for pattern in patterns:
156
+ if re.search(pattern, tls_info, re.IGNORECASE):
157
+ return tech
158
+ elif isinstance(patterns, str):
159
+ if re.search(patterns, tls_info, re.IGNORECASE):
160
+ return tech
161
+
162
+ # Check CNAME rules.
163
+ if "cname" in rules:
164
+ patterns = rules["cname"]
165
+ if not isinstance(patterns, list):
166
+ patterns = [patterns]
167
+ for cname in cname_records:
168
+ for pattern in patterns:
169
+ if re.search(pattern, cname, re.IGNORECASE):
170
+ return tech
171
+
172
+ # No match found.
173
+ return None
174
+
175
+ # Use ThreadPoolExecutor to run checks concurrently.
176
+ with concurrent.futures.ThreadPoolExecutor() as executor:
177
+ future_to_tech = {
178
+ executor.submit(check_tech, tech, rules): tech
179
+ for tech, rules in self.fingerprints.items()
180
+ }
181
+ for future in concurrent.futures.as_completed(future_to_tech):
182
+ result = future.result()
183
+ if result and result not in detected:
184
+ detected.append(result)
185
+
186
+ # Process "implies" field.
187
+ for tech in detected.copy():
188
+ if tech in self.fingerprints:
189
+ rule = self.fingerprints[tech]
190
+ if "implies" in rule:
191
+ implied = rule["implies"]
192
+ if isinstance(implied, list):
193
+ for impl in implied:
194
+ if impl not in detected:
195
+ detected.append(impl)
196
+ elif isinstance(implied, str):
197
+ if implied not in detected:
198
+ detected.append(implied)
199
+
200
+ # Extra AWS IP range check.
201
+ def get_target_ip(target):
202
+ try:
203
+ return socket.gethostbyname(target)
204
+ except Exception:
205
+ return None
206
+
207
+ target_ip = get_target_ip(domain)
208
+ if target_ip:
209
+ aws_ranges = self.load_aws_ranges()
210
+ if aws_ranges and self.is_ip_in_aws(target_ip, aws_ranges):
211
+ if "Amazon Web Services" not in detected:
212
+ detected.append("Amazon Web Services")
213
+ return detected