subcat 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- subcat/__init__.py +5 -0
- subcat/config.py +36 -0
- subcat/detector.py +213 -0
- subcat/fingerprints.json +80611 -0
- subcat/logger.py +110 -0
- subcat/modules/alienvault.py +90 -0
- subcat/modules/anubis.py +36 -0
- subcat/modules/bevigil.py +40 -0
- subcat/modules/binaryedge.py +107 -0
- subcat/modules/censys.py +116 -0
- subcat/modules/certspotter.py +29 -0
- subcat/modules/chaos.py +40 -0
- subcat/modules/ctrsh.py +34 -0
- subcat/modules/digitalyama.py +48 -0
- subcat/modules/dnsarchive.py +44 -0
- subcat/modules/dnsdumpster.py +47 -0
- subcat/modules/hackertarget.py +72 -0
- subcat/modules/netlas.py +74 -0
- subcat/modules/securitytrails.py +107 -0
- subcat/modules/shodan.py +93 -0
- subcat/modules/threatcrowd.py +83 -0
- subcat/modules/urlscan.py +73 -0
- subcat/modules/virustotal.py +114 -0
- subcat/modules/wayback.py +45 -0
- subcat/navigator.py +207 -0
- subcat/subcat.py +618 -0
- subcat-1.3.1.dist-info/METADATA +278 -0
- subcat-1.3.1.dist-info/RECORD +31 -0
- subcat-1.3.1.dist-info/WHEEL +5 -0
- subcat-1.3.1.dist-info/entry_points.txt +2 -0
- subcat-1.3.1.dist-info/top_level.txt +1 -0
subcat/__init__.py
ADDED
subcat/config.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Config:
|
|
6
|
+
def __init__(self, logger=None, config: str = 'config.yaml'):
|
|
7
|
+
self.logger = logger
|
|
8
|
+
# Check if config is an absolute path; if not, use current file's directory.
|
|
9
|
+
if not os.path.isabs(config):
|
|
10
|
+
dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
11
|
+
self.config = os.path.join(dir_path, config)
|
|
12
|
+
else:
|
|
13
|
+
self.config = config
|
|
14
|
+
|
|
15
|
+
# Check if the file exists
|
|
16
|
+
if not os.path.exists(self.config):
|
|
17
|
+
if self.logger:
|
|
18
|
+
self.logger.error(f"Config file not found: {self.config}")
|
|
19
|
+
else:
|
|
20
|
+
print(f"Config file not found: {self.config}")
|
|
21
|
+
self.config = None
|
|
22
|
+
|
|
23
|
+
def read(self, module: str):
|
|
24
|
+
if not self.config:
|
|
25
|
+
return False
|
|
26
|
+
try:
|
|
27
|
+
with open(self.config, 'r') as f:
|
|
28
|
+
data = yaml.safe_load(f)
|
|
29
|
+
# Return the module data if present, else False
|
|
30
|
+
return data.get(module, False)
|
|
31
|
+
except Exception as e:
|
|
32
|
+
if self.logger:
|
|
33
|
+
self.logger.error(f"Failed to read config file: {e}")
|
|
34
|
+
else:
|
|
35
|
+
print(f"Failed to read config file: {e}")
|
|
36
|
+
return False
|
subcat/detector.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import socket
|
|
3
|
+
import ipaddress
|
|
4
|
+
import os
|
|
5
|
+
import json
|
|
6
|
+
import concurrent.futures
|
|
7
|
+
if __package__:
|
|
8
|
+
from .navigator import Navigator
|
|
9
|
+
else:
|
|
10
|
+
from navigator import Navigator
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Detector:
|
|
14
|
+
def __init__(self, logger=None):
|
|
15
|
+
self.logger = logger
|
|
16
|
+
self.fingerprints = self.load_fingerprints()
|
|
17
|
+
|
|
18
|
+
def load_fingerprints(self) -> dict:
|
|
19
|
+
"""
|
|
20
|
+
Load fingerprints from the fingerprints.json file located in the same directory.
|
|
21
|
+
"""
|
|
22
|
+
fingerprints_file = os.path.join(os.path.dirname(__file__), 'fingerprints.json')
|
|
23
|
+
try:
|
|
24
|
+
with open(fingerprints_file) as f:
|
|
25
|
+
cached = json.load(f)
|
|
26
|
+
return cached.get('apps', {})
|
|
27
|
+
except Exception as e:
|
|
28
|
+
if self.logger:
|
|
29
|
+
self.logger.error(f"Failed to load fingerprints: {e}")
|
|
30
|
+
return {}
|
|
31
|
+
|
|
32
|
+
def get_tls_certificate(self, hostname, port=443):
|
|
33
|
+
import ssl
|
|
34
|
+
context = ssl.create_default_context()
|
|
35
|
+
conn = socket.create_connection((hostname, port))
|
|
36
|
+
sock = context.wrap_socket(conn, server_hostname=hostname)
|
|
37
|
+
cert = sock.getpeercert()
|
|
38
|
+
sock.close()
|
|
39
|
+
return cert
|
|
40
|
+
|
|
41
|
+
def extract_tls_info(self, cert):
|
|
42
|
+
details = ""
|
|
43
|
+
subject = cert.get("subject", [])
|
|
44
|
+
issuer = cert.get("issuer", [])
|
|
45
|
+
for tup in subject:
|
|
46
|
+
details += " ".join(val for key, val in tup) + " "
|
|
47
|
+
for tup in issuer:
|
|
48
|
+
details += " ".join(val for key, val in tup) + " "
|
|
49
|
+
return details.strip()
|
|
50
|
+
|
|
51
|
+
def get_cname(self, target):
|
|
52
|
+
try:
|
|
53
|
+
hostname, aliaslist, _ = socket.gethostbyname_ex(target)
|
|
54
|
+
return aliaslist
|
|
55
|
+
except Exception:
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
def load_aws_ranges(self, url: str = "https://ip-ranges.amazonaws.com/ip-ranges.json") -> dict:
|
|
59
|
+
try:
|
|
60
|
+
with Navigator(debug=self.logger is not None, logger=self.logger) as nav:
|
|
61
|
+
return nav.request(url, method="GET", response_type="json")
|
|
62
|
+
except Exception as e:
|
|
63
|
+
if self.logger:
|
|
64
|
+
self.logger.debug(f"Error loading AWS ranges: {e}")
|
|
65
|
+
return {}
|
|
66
|
+
|
|
67
|
+
def is_ip_in_aws(self, ip, aws_ranges) -> bool:
|
|
68
|
+
try:
|
|
69
|
+
ip_obj = ipaddress.ip_address(ip)
|
|
70
|
+
for prefix in aws_ranges.get("prefixes", []):
|
|
71
|
+
network = ipaddress.ip_network(prefix["ip_prefix"])
|
|
72
|
+
if ip_obj in network:
|
|
73
|
+
return True
|
|
74
|
+
except Exception:
|
|
75
|
+
pass
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
def detect(self, domain: str, response) -> list:
|
|
79
|
+
"""
|
|
80
|
+
Detect technologies for the given domain using a single Navigator response.
|
|
81
|
+
This version applies threading for faster detection.
|
|
82
|
+
"""
|
|
83
|
+
detected = []
|
|
84
|
+
page_content = response.text
|
|
85
|
+
headers = response.headers
|
|
86
|
+
headers_normalized = {k.lower(): v for k, v in headers.items()} if headers else {}
|
|
87
|
+
|
|
88
|
+
# Get TLS info once.
|
|
89
|
+
tls_info = ""
|
|
90
|
+
try:
|
|
91
|
+
cert = self.get_tls_certificate(domain)
|
|
92
|
+
tls_info = self.extract_tls_info(cert)
|
|
93
|
+
except Exception as e:
|
|
94
|
+
if self.logger:
|
|
95
|
+
self.logger.debug(f"TLS detection failed for {domain}: {e}")
|
|
96
|
+
|
|
97
|
+
# Get CNAME records once.
|
|
98
|
+
cname_records = self.get_cname(domain)
|
|
99
|
+
|
|
100
|
+
# Define a function to check each technology.
|
|
101
|
+
def check_tech(tech, rules):
|
|
102
|
+
# Check header rules.
|
|
103
|
+
if "headers" in rules:
|
|
104
|
+
for header, patterns in rules["headers"].items():
|
|
105
|
+
header_value = headers_normalized.get(header.lower(), "")
|
|
106
|
+
if header_value:
|
|
107
|
+
if isinstance(patterns, list):
|
|
108
|
+
for pattern in patterns:
|
|
109
|
+
if re.search(pattern, header_value, re.IGNORECASE):
|
|
110
|
+
return tech
|
|
111
|
+
elif isinstance(patterns, str):
|
|
112
|
+
if re.search(patterns, header_value, re.IGNORECASE):
|
|
113
|
+
return tech
|
|
114
|
+
|
|
115
|
+
# Check HTML rules.
|
|
116
|
+
if "html" in rules and page_content:
|
|
117
|
+
patterns = rules["html"]
|
|
118
|
+
if isinstance(patterns, list):
|
|
119
|
+
for pattern in patterns:
|
|
120
|
+
if re.search(pattern, page_content, re.IGNORECASE):
|
|
121
|
+
return tech
|
|
122
|
+
elif isinstance(patterns, str):
|
|
123
|
+
if re.search(patterns, page_content, re.IGNORECASE):
|
|
124
|
+
return tech
|
|
125
|
+
|
|
126
|
+
# Check meta rules.
|
|
127
|
+
if "meta" in rules and page_content:
|
|
128
|
+
patterns = rules["meta"]
|
|
129
|
+
if isinstance(patterns, list):
|
|
130
|
+
for pattern in patterns:
|
|
131
|
+
if re.search(pattern, page_content, re.IGNORECASE):
|
|
132
|
+
return tech
|
|
133
|
+
elif isinstance(patterns, str):
|
|
134
|
+
if re.search(patterns, page_content, re.IGNORECASE):
|
|
135
|
+
return tech
|
|
136
|
+
|
|
137
|
+
# Check script rules.
|
|
138
|
+
if "script" in rules and page_content:
|
|
139
|
+
patterns = rules["script"]
|
|
140
|
+
script_srcs = re.findall(r'<script[^>]+src=["\'](.*?)["\']', page_content, re.IGNORECASE)
|
|
141
|
+
if isinstance(patterns, list):
|
|
142
|
+
for pattern in patterns:
|
|
143
|
+
for src in script_srcs:
|
|
144
|
+
if re.search(pattern, src, re.IGNORECASE):
|
|
145
|
+
return tech
|
|
146
|
+
elif isinstance(patterns, str):
|
|
147
|
+
for src in script_srcs:
|
|
148
|
+
if re.search(patterns, src, re.IGNORECASE):
|
|
149
|
+
return tech
|
|
150
|
+
|
|
151
|
+
# Check TLS rules.
|
|
152
|
+
if tls_info and "tls" in rules:
|
|
153
|
+
patterns = rules["tls"]
|
|
154
|
+
if isinstance(patterns, list):
|
|
155
|
+
for pattern in patterns:
|
|
156
|
+
if re.search(pattern, tls_info, re.IGNORECASE):
|
|
157
|
+
return tech
|
|
158
|
+
elif isinstance(patterns, str):
|
|
159
|
+
if re.search(patterns, tls_info, re.IGNORECASE):
|
|
160
|
+
return tech
|
|
161
|
+
|
|
162
|
+
# Check CNAME rules.
|
|
163
|
+
if "cname" in rules:
|
|
164
|
+
patterns = rules["cname"]
|
|
165
|
+
if not isinstance(patterns, list):
|
|
166
|
+
patterns = [patterns]
|
|
167
|
+
for cname in cname_records:
|
|
168
|
+
for pattern in patterns:
|
|
169
|
+
if re.search(pattern, cname, re.IGNORECASE):
|
|
170
|
+
return tech
|
|
171
|
+
|
|
172
|
+
# No match found.
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
# Use ThreadPoolExecutor to run checks concurrently.
|
|
176
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
177
|
+
future_to_tech = {
|
|
178
|
+
executor.submit(check_tech, tech, rules): tech
|
|
179
|
+
for tech, rules in self.fingerprints.items()
|
|
180
|
+
}
|
|
181
|
+
for future in concurrent.futures.as_completed(future_to_tech):
|
|
182
|
+
result = future.result()
|
|
183
|
+
if result and result not in detected:
|
|
184
|
+
detected.append(result)
|
|
185
|
+
|
|
186
|
+
# Process "implies" field.
|
|
187
|
+
for tech in detected.copy():
|
|
188
|
+
if tech in self.fingerprints:
|
|
189
|
+
rule = self.fingerprints[tech]
|
|
190
|
+
if "implies" in rule:
|
|
191
|
+
implied = rule["implies"]
|
|
192
|
+
if isinstance(implied, list):
|
|
193
|
+
for impl in implied:
|
|
194
|
+
if impl not in detected:
|
|
195
|
+
detected.append(impl)
|
|
196
|
+
elif isinstance(implied, str):
|
|
197
|
+
if implied not in detected:
|
|
198
|
+
detected.append(implied)
|
|
199
|
+
|
|
200
|
+
# Extra AWS IP range check.
|
|
201
|
+
def get_target_ip(target):
|
|
202
|
+
try:
|
|
203
|
+
return socket.gethostbyname(target)
|
|
204
|
+
except Exception:
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
target_ip = get_target_ip(domain)
|
|
208
|
+
if target_ip:
|
|
209
|
+
aws_ranges = self.load_aws_ranges()
|
|
210
|
+
if aws_ranges and self.is_ip_in_aws(target_ip, aws_ranges):
|
|
211
|
+
if "Amazon Web Services" not in detected:
|
|
212
|
+
detected.append("Amazon Web Services")
|
|
213
|
+
return detected
|