subprober 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- subprober/__init__.py +14 -0
- subprober/cli/__init__.py +0 -0
- subprober/cli/cli.py +209 -0
- subprober/dnsclient/__init__.py +0 -0
- subprober/dnsclient/dnsclient.py +321 -0
- subprober/hash/__init__.py +0 -0
- subprober/hash/hash.py +52 -0
- subprober/headless/__init__.py +0 -0
- subprober/headless/headless.py +554 -0
- subprober/hmap/__init__.py +3 -0
- subprober/hmap/hmap.py +240 -0
- subprober/httpclient/__init__.py +0 -0
- subprober/httpclient/httpclient.py +772 -0
- subprober/httpfilters/__init__.py +0 -0
- subprober/httpfilters/httpfilters.py +664 -0
- subprober/jarmscanner/__init__.py +0 -0
- subprober/jarmscanner/jarmscanner.py +217 -0
- subprober/progresslogger/__init__.py +0 -0
- subprober/progresslogger/progresslogger.py +32 -0
- subprober/pyrunner/__init__.py +0 -0
- subprober/pyrunner/pyrunner.py +1001 -0
- subprober/resparser/__init__.py +0 -0
- subprober/resparser/resparser.py +401 -0
- subprober/settings/__init__.py +0 -0
- subprober/settings/settings.py +212 -0
- subprober/subprober.py +40 -0
- subprober/syncutils/__init__.py +0 -0
- subprober/syncutils/syncutils.py +35 -0
- subprober/urlbuilder/__init__.py +0 -0
- subprober/urlbuilder/urlbuilder.py +166 -0
- subprober/utils/__init__.py +0 -0
- subprober/utils/utils.py +186 -0
- subprober/websocketclient/__init__.py +0 -0
- subprober/websocketclient/websocketclient.py +283 -0
- subprober/workerpool/__init__.py +3 -0
- subprober/workerpool/workerpool.py +194 -0
- subprober-3.1.0.dist-info/METADATA +407 -0
- subprober-3.1.0.dist-info/RECORD +42 -0
- subprober-3.1.0.dist-info/WHEEL +5 -0
- subprober-3.1.0.dist-info/entry_points.txt +2 -0
- subprober-3.1.0.dist-info/licenses/LICENSE +0 -0
- subprober-3.1.0.dist-info/top_level.txt +1 -0
subprober/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from subprober.httpclient.httpclient import RetryableHttp, HttpResponse
|
|
2
|
+
from subprober.pyrunner.pyrunner import PyRunner
|
|
3
|
+
from subprober.settings.settings import Settings
|
|
4
|
+
from subprober.hmap.hmap import HMap
|
|
5
|
+
from subprober.workerpool.workerpool import WorkerPool
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"RetryableHttp",
|
|
9
|
+
"HttpResponse",
|
|
10
|
+
"PyRunner",
|
|
11
|
+
"Settings",
|
|
12
|
+
"HMap",
|
|
13
|
+
"WorkerPool",
|
|
14
|
+
]
|
|
File without changes
|
subprober/cli/cli.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
from richparser import RichParser
|
|
2
|
+
|
|
3
|
+
class CLI:
|
|
4
|
+
def __init__(self):
|
|
5
|
+
self.parser = None
|
|
6
|
+
|
|
7
|
+
def cli(self):
|
|
8
|
+
try:
|
|
9
|
+
parser = RichParser(
|
|
10
|
+
description="Subprober - An essential HTTP multi-purpose Probing Tool for Penetration Testers and Security Researchers with Asynchronous httpx client support ⚡",
|
|
11
|
+
auto=False
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# INPUT Section
|
|
15
|
+
parser.add_argument("input", "-l", "--list", type=str,
|
|
16
|
+
help="specify the filename containing a list of URLs to probe")
|
|
17
|
+
parser.add_argument("input", "-u", "--url", type=str,
|
|
18
|
+
help="specify a URL to probe and supports comma-separated values (-u google.com,https://hackerone.com)")
|
|
19
|
+
parser.add_argument("input", "-resume", "--resume", type=str, default=None,
|
|
20
|
+
help="specify the resume filename generated by the subprober to continue the scan (-resume resume_Djjfos.cfg)")
|
|
21
|
+
|
|
22
|
+
# PROBES Section
|
|
23
|
+
parser.add_argument("probes", "-status-code", "--status-code", action="store_true",
|
|
24
|
+
help="display the status code of the host")
|
|
25
|
+
parser.add_argument("probes", "-title", "--title", action="store_true", help="display the title of host")
|
|
26
|
+
parser.add_argument("probes", "-server", "--server", action="store_true",
|
|
27
|
+
help="display the server name of the host")
|
|
28
|
+
parser.add_argument("probes", "-wc", "--word-count", action="store_true",
|
|
29
|
+
help="display the HTTP response word count")
|
|
30
|
+
parser.add_argument("probes", "-lc", "--line-count", action="store_true",
|
|
31
|
+
help="display the HTTP response line count")
|
|
32
|
+
parser.add_argument("probes", "-cl", "--content-length", action="store_true",
|
|
33
|
+
help="display the HTTP response content length")
|
|
34
|
+
parser.add_argument("probes", "-location", "--location", action="store_true",
|
|
35
|
+
help="display the redirected location of the host")
|
|
36
|
+
parser.add_argument("probes", "-application-type", "--application-type", action="store_true",
|
|
37
|
+
help="display the content type of the host")
|
|
38
|
+
parser.add_argument("probes", "-ip", "--ipaddress", action="store_true", help="display the IPs of the host")
|
|
39
|
+
parser.add_argument("probes", "-cname", "--cname", action="store_true", help="display the CNAMEs of the host")
|
|
40
|
+
parser.add_argument("probes", "-aaaa", "--aaaa-records", action="store_true",
|
|
41
|
+
help="display the AAAA records of the host")
|
|
42
|
+
parser.add_argument("probes", "-htv", "--http-version", action="store_true",
|
|
43
|
+
help="display the server supported HTTP version of the host")
|
|
44
|
+
parser.add_argument("probes", "-hrs", "--http-reason", action="store_true",
|
|
45
|
+
help="display the reason for HTTP connection of the host")
|
|
46
|
+
parser.add_argument("probes", "-jarm", "--jarm-fingerprint", action="store_true",
|
|
47
|
+
help="display the JARM fingerprint hash of the host")
|
|
48
|
+
parser.add_argument("probes", "-rpt", "--response-time", action="store_true",
|
|
49
|
+
help="display the response time for the successful request")
|
|
50
|
+
parser.add_argument("probes", "-wss", "--websocket", action="store_true",
|
|
51
|
+
help="display the server supports websockets")
|
|
52
|
+
parser.add_argument("probes", "-hash", "--hash", type=str,
|
|
53
|
+
help="display response body in hash format (supported hashes: md5, mmh3, simhash, sha1, sha256, sha512)")
|
|
54
|
+
parser.add_argument("probes", "-dmt", "--display-method", action="store_true",
|
|
55
|
+
help="display the method of the HTTP request")
|
|
56
|
+
parser.add_argument("probes", "-bp", "--body-preview", action="store_true",
|
|
57
|
+
help="display the HTTP response body in first n number of characters (default: 100)")
|
|
58
|
+
parser.add_argument("probes", "-body", "--body", type=str, default=None,
|
|
59
|
+
help="post body to include in the http request and support all post body types (ex: -body 'username=admin&password=password')")
|
|
60
|
+
parser.add_argument("probes", "-resolvers", "--resolvers", type=str, default="8.8.8.8,1.1.1.1",
|
|
61
|
+
help="custom DNS resolver for dns resolution and supports comma-separated (ex -resolvers 8.8.8.8,1.1.1.1)")
|
|
62
|
+
|
|
63
|
+
# CONFIG Section
|
|
64
|
+
parser.add_argument("config", "-dhp", "--disable-http-probe", action="store_true",
|
|
65
|
+
help="disable subprober from fallback to http scheme (default: disabled)")
|
|
66
|
+
parser.add_argument("config", "-X", "--method", type=str,
|
|
67
|
+
choices=["get", "post", "head", "put", "delete", "patch", "trace", "connect",
|
|
68
|
+
"options"], default="get",
|
|
69
|
+
help="request methods to probe and get response (supported: get, post, head, put, delete, patch, trace, connect, options) (default: get)")
|
|
70
|
+
parser.add_argument("config", "-H", "--header", action="append",
|
|
71
|
+
help="add custom headers for probing and -H can be used multiple times to pass multiple header values (ex: -H application/json -H X-Forwarded-Host: 127.0.0.1)")
|
|
72
|
+
parser.add_argument("config", "-ra", "--random-agent", action="store_true",
|
|
73
|
+
help="enable Random User-Agent to use for probing and applies same to screenshots. (default: subprober/Alpha)")
|
|
74
|
+
parser.add_argument("config", "-proxy", "--proxy", type=str,
|
|
75
|
+
help="specify a proxy to send the requests through it (ex: http://127.0.0.1:8080)", default=None)
|
|
76
|
+
parser.add_argument("config", "-ar", "--allow-redirect", action="store_true",
|
|
77
|
+
help="enable following redirections")
|
|
78
|
+
parser.add_argument("config", "-maxr", "--max-redirection", type=int, default=10,
|
|
79
|
+
help="set max value to follow redirections (default: 10)")
|
|
80
|
+
parser.add_argument("config", "-http2", "--http2", action="store_true",
|
|
81
|
+
help="enable to request with HTTP/2 support (default: Http/1.1) (info: deprecated)")
|
|
82
|
+
parser.add_argument("config", "-sni", "--sni-hostname", type=str,
|
|
83
|
+
help="set custom TLS SNI host name for requests.")
|
|
84
|
+
|
|
85
|
+
parser.add_argument("config", "-stats", "--stats",action="store_true",
|
|
86
|
+
help="show the progress stats of the subprober (info: specially for docker environments)")
|
|
87
|
+
|
|
88
|
+
# MISCELLANEOUS Section
|
|
89
|
+
parser.add_argument("miscellaneous", "-path", "--path", type=str,
|
|
90
|
+
help="specify a path or text file of paths for probing and getting results (example: -p admin.php or -p paths.txt)")
|
|
91
|
+
parser.add_argument("miscellaneous", "-port", "--port", type=str,
|
|
92
|
+
help="set custom port for making HTTP request and default ports are 80,443 based on the url scheme")
|
|
93
|
+
parser.add_argument("miscellaneous", "-tls", "--tls", action="store_true",
|
|
94
|
+
help="grabs the TLS data for the requested host")
|
|
95
|
+
|
|
96
|
+
# HEADLESS Section
|
|
97
|
+
parser.add_argument("headless", "-ss", "--screenshot", action="store_true",
|
|
98
|
+
help="enable to take screenshots of the page using headless browsers with asynchronous performance")
|
|
99
|
+
parser.add_argument("headless", "-st", "--screenshot-timeout", type=int, default=10,
|
|
100
|
+
help="set a timeout value for taking screenshots (default: 15)")
|
|
101
|
+
parser.add_argument("headless", "-scp", "--system-chrome-path", type=str, default=None,
|
|
102
|
+
help="specify the executable path of the chromedriver to use system chrome to take screenshots")
|
|
103
|
+
parser.add_argument("headless", "-pdf", "--save-pdf", action="store_true",
|
|
104
|
+
help="enable to save the screenshot image in the pdf format (default: png)")
|
|
105
|
+
parser.add_argument("headless", "-no-fpg", "--no-full-page", action="store_true",
|
|
106
|
+
help="disable saving screenshot in full page")
|
|
107
|
+
parser.add_argument("headless", "-icb", "--include-bytes", action="store_true",
|
|
108
|
+
help="enable to include the screenshot bytes in output when json output enabled")
|
|
109
|
+
parser.add_argument("headless", "-hos", "--headless-options", type=str, default=None,
|
|
110
|
+
help="set additional chrome headless browser options and supports comma-separated values (-ho \"--start-maximized\")")
|
|
111
|
+
parser.add_argument("headless", "-sid", "--screenshot-idle", default=1, type=int,
|
|
112
|
+
help="set custom idle time in seconds before taking screenshots (default: 1)")
|
|
113
|
+
parser.add_argument("headless", "-sp", "--screenshot-path", type=str,
|
|
114
|
+
help="specify a directory path to store screenshot results (default: currentdir/screenshots)")
|
|
115
|
+
|
|
116
|
+
# MATCHERS Section
|
|
117
|
+
parser.add_argument("matchers", "-mc", "--match-code", type=str,
|
|
118
|
+
help="match http response by specified status codes and supports comma-separated values (-mc 200,302)")
|
|
119
|
+
parser.add_argument("matchers", "-mcr", "--match-code-range", type=str,
|
|
120
|
+
help="match http response by specified status code range and supports single value (-mcr 200-299)")
|
|
121
|
+
parser.add_argument("matchers", "-ms", "--match-string", type=str,
|
|
122
|
+
help="match http response containing the specified string and supports comma-separated values (-ms admin,login)")
|
|
123
|
+
parser.add_argument("matchers", "-mr", "--match-regex", type=str,
|
|
124
|
+
help="match http response matching the specified regex and supports comma-separated values (-mr .*admin.*,.*login.*)")
|
|
125
|
+
parser.add_argument("matchers", "-mpt", "--match-path", type=str,
|
|
126
|
+
help="match http response by URL path and supports comma-separated values (-mpt /admin/wp-ajax.php,/wp-json)")
|
|
127
|
+
parser.add_argument("matchers", "-ml", "--match-length", type=str,
|
|
128
|
+
help="match http response by specified response length and supports comma-separated values (-ml 1024,2048)")
|
|
129
|
+
parser.add_argument("matchers", "-mlc", "--match-line-count", type=str,
|
|
130
|
+
help="match http response by specified response line count and supports comma-separated values (-mlc 10,50)")
|
|
131
|
+
parser.add_argument("matchers", "-mwc", "--match-word-count", type=str,
|
|
132
|
+
help="match http response by specified word count and supports comma-separated values (-mwc 100,500)")
|
|
133
|
+
parser.add_argument("matchers", "-mrt", "--match-response-time", type=float,
|
|
134
|
+
help="match http response exceeding the specified minimum response time in seconds (-mrt 2.30)")
|
|
135
|
+
|
|
136
|
+
# FILTERS Section
|
|
137
|
+
parser.add_argument("filters", "-fc", "--filter-code", type=str,
|
|
138
|
+
help="filter http response by specified status codes and supports comma-separated values (-fc 404,500)")
|
|
139
|
+
parser.add_argument("filters", "-fcr", "--filter-code-range", type=str,
|
|
140
|
+
help="filter http response by specified status code range and supports single value (-fcr 400-499)")
|
|
141
|
+
parser.add_argument("filters", "-fs", "--filter-string", type=str,
|
|
142
|
+
help="filter http response containing the specified string and supports comma-separated values (-fs error,not found)")
|
|
143
|
+
parser.add_argument("filters", "-fr", "--filter-regex", type=str,
|
|
144
|
+
help="filter http response matching the specified regex and supports comma-separated values (-fr .*admin.*,.*login.*)")
|
|
145
|
+
parser.add_argument("filters", "-fpt", "--filter-path", type=str,
|
|
146
|
+
help="filter http response by URL path and supports comma-separated values (-fpt /error,404.html)")
|
|
147
|
+
parser.add_argument("filters", "-fl", "--filter-length", type=str,
|
|
148
|
+
help="filter http response by specified response length and supports comma-separated values (-fl 1024,2048)")
|
|
149
|
+
parser.add_argument("filters", "-flc", "--filter-line-count", type=str,
|
|
150
|
+
help="filter http response by specified response line count and supports comma-separated values (-flc 10,50)")
|
|
151
|
+
parser.add_argument("filters", "-fwc", "--filter-word-count", type=str,
|
|
152
|
+
help="filter http response by specified response word count and supports comma-separated values (-fwc 100,500)")
|
|
153
|
+
parser.add_argument("filters", "-frt", "--filter-response-time", type=float,
|
|
154
|
+
help="filter http response exceeding the specified maximum response time in seconds (-frt 2.30)")
|
|
155
|
+
|
|
156
|
+
# OUTPUT Section
|
|
157
|
+
parser.add_argument("output", "-o", "--output", type=str,
|
|
158
|
+
help="define the output filename to store the results of the probing operation.")
|
|
159
|
+
parser.add_argument("output", "-json", "--json", action="store_true",
|
|
160
|
+
help="store and display output in JSON format (includes only data from enabled options).")
|
|
161
|
+
parser.add_argument("output", "-rdu", "--redirect-urls", action="store_true",
|
|
162
|
+
help="display the redirect URLs in the output (requires -json and -ar to enabled to enabled).")
|
|
163
|
+
parser.add_argument("output", "-rdh", "--redirect-history", action="store_true",
|
|
164
|
+
help="display the full redirect history (requires -json and -ar to enabled).")
|
|
165
|
+
parser.add_argument("output", "-rsc", "--redirect-status-codes", action="store_true",
|
|
166
|
+
help="display the status codes for redirections (requires -json and -ar to enabled).")
|
|
167
|
+
parser.add_argument("output", "-rqh", "--request-headers", action="store_true",
|
|
168
|
+
help="include request headers in the output (requires -json and -ar to enabled).")
|
|
169
|
+
parser.add_argument("output", "-rsh", "--response-headers", action="store_true",
|
|
170
|
+
help="include response headers in the output (requires -json and -ar to enabled).")
|
|
171
|
+
parser.add_argument("output", "-fo", "--full-output", action="store_true",
|
|
172
|
+
help="include all available data in the output (requires -json to enabled and doesn't overrides websocket,jarm,hashes options).")
|
|
173
|
+
|
|
174
|
+
# RATE-LIMIT Section
|
|
175
|
+
parser.add_argument("rate-limit", "-c", "--concurrency", type=int, default=100,
|
|
176
|
+
help="set the concurrency level for sending http requests (default: 100)")
|
|
177
|
+
parser.add_argument("rate-limit", "-rtl", "--rate-limit", type=int, default=150,
|
|
178
|
+
help="set a rate limit for sending a maximum number of requests per second (default: 1000)")
|
|
179
|
+
|
|
180
|
+
# OPTIMIZATION Section
|
|
181
|
+
parser.add_argument("optimization", "-timeout", "--timeout", type=int, default=10,
|
|
182
|
+
help="set a custom timeout value for sending requests.")
|
|
183
|
+
parser.add_argument("optimization", "-delay", "--delay", type=float, default=0.5,
|
|
184
|
+
help="set a delay in seconds before sending each request (default: None)")
|
|
185
|
+
parser.add_argument("optimization", "-rts", "--retries", type=int, default=0,
|
|
186
|
+
help="set a number of retries if a request fails to connect (default: 0)")
|
|
187
|
+
|
|
188
|
+
# UPDATES Section
|
|
189
|
+
parser.add_argument("updates", "-up", "--update", action="store_true",
|
|
190
|
+
help="update subprober to the latest version (uv tool required to be installed)")
|
|
191
|
+
parser.add_argument("updates", "-sup", "--show-updates", action="store_true",
|
|
192
|
+
help="display the current or latest version of subprober updates")
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
parser.add_argument("debug", "-silent", "--silent", action="store_true",
|
|
196
|
+
help="enable silent mode to suppress the display of Subprober banner and version information.")
|
|
197
|
+
parser.add_argument("debug", "-verbose", "--verbose", action="store_true",
|
|
198
|
+
help="enable verbose mode to display error results on the console.")
|
|
199
|
+
parser.add_argument("debug", "-nc", "--no-color", action="store_true",
|
|
200
|
+
help="enable to display the output without any CLI colors")
|
|
201
|
+
parser.add_argument("debug", "-debug", "--debug", action="store_true", help="debug the subprober workflow")
|
|
202
|
+
|
|
203
|
+
self.parser = parser
|
|
204
|
+
return parser.parse_args()
|
|
205
|
+
except KeyboardInterrupt:
|
|
206
|
+
exit(1)
|
|
207
|
+
except Exception as e:
|
|
208
|
+
print(f"Exception occurred in the cli module due to: {e}, {type(e)}")
|
|
209
|
+
exit(1)
|
|
File without changes
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
import aiodns
|
|
2
|
+
import asyncio
|
|
3
|
+
import socket
|
|
4
|
+
from typing import Optional, List
|
|
5
|
+
from contextlib import asynccontextmanager
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AsyncDnsClient:
|
|
9
|
+
"""
|
|
10
|
+
Asynchronous DNS client for domain resolution.
|
|
11
|
+
|
|
12
|
+
This client uses aiodns library to perform DNS queries with retry logic.
|
|
13
|
+
Concurrency control is handled by the caller.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
nameservers: Optional[List[str]] = None,
|
|
19
|
+
max_retries: int = 3,
|
|
20
|
+
timeout: float = 5.0,
|
|
21
|
+
verbose: bool = False
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Initialize DNS client.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
nameservers: List of DNS servers to use (default: Cloudflare and Google)
|
|
28
|
+
max_retries: Maximum retry attempts per query
|
|
29
|
+
timeout: Query timeout in seconds
|
|
30
|
+
verbose: Enable verbose logging
|
|
31
|
+
"""
|
|
32
|
+
self.nameservers = nameservers or ["1.1.1.1", "8.8.8.8"]
|
|
33
|
+
self.max_retries = max_retries
|
|
34
|
+
self.timeout = timeout
|
|
35
|
+
self.verbose = verbose
|
|
36
|
+
self._resolver = None
|
|
37
|
+
self._resolver_lock = asyncio.Lock()
|
|
38
|
+
|
|
39
|
+
def _log(self, message: str, level: str = "INFO"):
|
|
40
|
+
"""Log message if verbose is enabled."""
|
|
41
|
+
if self.verbose:
|
|
42
|
+
print(f"[DNS-{level}] {message}")
|
|
43
|
+
|
|
44
|
+
async def _get_resolver(self) -> aiodns.DNSResolver:
|
|
45
|
+
"""
|
|
46
|
+
Get or create a shared DNS resolver instance.
|
|
47
|
+
|
|
48
|
+
Reuses a single resolver instance to avoid exhausting system inotify watches.
|
|
49
|
+
Thread-safe through async lock.
|
|
50
|
+
"""
|
|
51
|
+
if self._resolver is None:
|
|
52
|
+
async with self._resolver_lock:
|
|
53
|
+
# Double-check after acquiring lock
|
|
54
|
+
if self._resolver is None:
|
|
55
|
+
try:
|
|
56
|
+
loop = asyncio.get_running_loop()
|
|
57
|
+
self._resolver = aiodns.DNSResolver(
|
|
58
|
+
loop=loop,
|
|
59
|
+
nameservers=self.nameservers,
|
|
60
|
+
rotate=True,
|
|
61
|
+
timeout=self.timeout
|
|
62
|
+
)
|
|
63
|
+
self._log(f"DNS resolver created with nameservers: {self.nameservers}")
|
|
64
|
+
except Exception as e:
|
|
65
|
+
self._log(f"Failed to create DNS resolver: {e}", "ERROR")
|
|
66
|
+
raise
|
|
67
|
+
return self._resolver
|
|
68
|
+
|
|
69
|
+
def _extract_domain(self, url: str) -> Optional[str]:
|
|
70
|
+
"""
|
|
71
|
+
Extract domain from URL.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
url: URL or domain string
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Cleaned domain name or None if invalid
|
|
78
|
+
"""
|
|
79
|
+
if not url:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
# Remove protocol
|
|
83
|
+
if "://" in url:
|
|
84
|
+
url = url.split("://", 1)[1]
|
|
85
|
+
|
|
86
|
+
# Remove path, query, fragment
|
|
87
|
+
url = url.split("/", 1)[0].split("?", 1)[0].split("#", 1)[0]
|
|
88
|
+
|
|
89
|
+
# Remove port
|
|
90
|
+
url = url.split(":", 1)[0]
|
|
91
|
+
|
|
92
|
+
# Remove trailing dot
|
|
93
|
+
url = url.rstrip(".")
|
|
94
|
+
|
|
95
|
+
return url.lower() if url else None
|
|
96
|
+
|
|
97
|
+
async def _query_with_retry(
|
|
98
|
+
self,
|
|
99
|
+
domain: str,
|
|
100
|
+
record_type: str,
|
|
101
|
+
resolver: aiodns.DNSResolver
|
|
102
|
+
) -> List[str]:
|
|
103
|
+
"""
|
|
104
|
+
Execute DNS query with retry logic.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
domain: Domain to query
|
|
108
|
+
record_type: DNS record type (A, AAAA, CNAME, etc.)
|
|
109
|
+
resolver: DNS resolver instance
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
List of resolved records
|
|
113
|
+
"""
|
|
114
|
+
last_exception = None
|
|
115
|
+
|
|
116
|
+
for attempt in range(1, self.max_retries + 1):
|
|
117
|
+
try:
|
|
118
|
+
self._log(f"Querying {domain} ({record_type}) - Attempt {attempt}/{self.max_retries}")
|
|
119
|
+
|
|
120
|
+
results = await asyncio.wait_for(
|
|
121
|
+
resolver.query(domain, record_type),
|
|
122
|
+
timeout=self.timeout
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Parse results based on record type
|
|
126
|
+
if record_type == 'CNAME':
|
|
127
|
+
records = [results.cname] if hasattr(results, 'cname') else []
|
|
128
|
+
elif record_type in ('A', 'AAAA'):
|
|
129
|
+
records = [result.host for result in results] if results else []
|
|
130
|
+
elif record_type == 'MX':
|
|
131
|
+
records = [result.host for result in results] if results else []
|
|
132
|
+
elif record_type == 'TXT':
|
|
133
|
+
records = [result.text for result in results] if results else []
|
|
134
|
+
elif record_type == 'NS':
|
|
135
|
+
records = [result.host for result in results] if results else []
|
|
136
|
+
elif record_type == 'SOA':
|
|
137
|
+
if results:
|
|
138
|
+
soa = results
|
|
139
|
+
records = [f"{soa.nsname} {soa.hostmaster}"]
|
|
140
|
+
else:
|
|
141
|
+
records = []
|
|
142
|
+
elif record_type == 'PTR':
|
|
143
|
+
records = [result.host for result in results] if results else []
|
|
144
|
+
else:
|
|
145
|
+
records = []
|
|
146
|
+
|
|
147
|
+
if records:
|
|
148
|
+
self._log(f"Successfully resolved {domain} ({record_type}): {records}")
|
|
149
|
+
return records
|
|
150
|
+
else:
|
|
151
|
+
self._log(f"No records found for {domain} ({record_type})")
|
|
152
|
+
return []
|
|
153
|
+
|
|
154
|
+
except asyncio.TimeoutError:
|
|
155
|
+
last_exception = f"Timeout after {self.timeout}s"
|
|
156
|
+
self._log(f"Timeout for {domain} ({record_type}) on attempt {attempt}", "WARN")
|
|
157
|
+
|
|
158
|
+
except aiodns.error.DNSError as e:
|
|
159
|
+
# DNS-specific errors (NXDOMAIN, SERVFAIL, etc.)
|
|
160
|
+
if e.args[0] == aiodns.error.ARES_ENOTFOUND:
|
|
161
|
+
self._log(f"Domain not found: {domain}", "DEBUG")
|
|
162
|
+
return [] # No retry for NXDOMAIN
|
|
163
|
+
elif e.args[0] == aiodns.error.ARES_ENODATA:
|
|
164
|
+
self._log(f"No data for {domain} ({record_type})", "DEBUG")
|
|
165
|
+
return [] # No retry for NODATA
|
|
166
|
+
last_exception = f"DNS error: {e}"
|
|
167
|
+
self._log(f"DNS error for {domain} ({record_type}): {e}", "WARN")
|
|
168
|
+
|
|
169
|
+
except socket.gaierror as e:
|
|
170
|
+
last_exception = f"Socket error: {e}"
|
|
171
|
+
self._log(f"Socket error for {domain} ({record_type}): {e}", "WARN")
|
|
172
|
+
|
|
173
|
+
except (KeyboardInterrupt, asyncio.CancelledError):
|
|
174
|
+
self._log("Query cancelled by user", "INFO")
|
|
175
|
+
raise
|
|
176
|
+
|
|
177
|
+
except Exception as e:
|
|
178
|
+
last_exception = f"Unexpected error: {e}"
|
|
179
|
+
self._log(f"Unexpected error for {domain} ({record_type}): {e}", "ERROR")
|
|
180
|
+
|
|
181
|
+
# Exponential backoff between retries
|
|
182
|
+
if attempt < self.max_retries:
|
|
183
|
+
backoff = min(2 ** (attempt - 1), 5) # Max 5 seconds
|
|
184
|
+
self._log(f"Retrying in {backoff}s...", "DEBUG")
|
|
185
|
+
await asyncio.sleep(backoff)
|
|
186
|
+
|
|
187
|
+
# All retries exhausted
|
|
188
|
+
self._log(f"All retries exhausted for {domain} ({record_type}). Last error: {last_exception}", "ERROR")
|
|
189
|
+
return []
|
|
190
|
+
|
|
191
|
+
async def resolve(
|
|
192
|
+
self,
|
|
193
|
+
url: str,
|
|
194
|
+
record_type: str = 'A'
|
|
195
|
+
) -> List[str]:
|
|
196
|
+
"""
|
|
197
|
+
Resolve domain with automatic retry and resource management.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
url: URL or domain to resolve
|
|
201
|
+
record_type: DNS record type (A, AAAA, CNAME, MX, TXT, NS, SOA, PTR)
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of resolved records
|
|
205
|
+
"""
|
|
206
|
+
domain = self._extract_domain(url)
|
|
207
|
+
|
|
208
|
+
if not domain:
|
|
209
|
+
self._log(f"Invalid domain: {url}", "ERROR")
|
|
210
|
+
return []
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
# Get shared resolver instance
|
|
214
|
+
resolver = await self._get_resolver()
|
|
215
|
+
return await self._query_with_retry(domain, record_type, resolver)
|
|
216
|
+
except (KeyboardInterrupt, asyncio.CancelledError):
|
|
217
|
+
return []
|
|
218
|
+
except Exception as e:
|
|
219
|
+
self._log(f"Fatal error resolving {domain}: {e}", "ERROR")
|
|
220
|
+
return []
|
|
221
|
+
|
|
222
|
+
async def resolve_many(
|
|
223
|
+
self,
|
|
224
|
+
domains: List[str],
|
|
225
|
+
record_type: str = 'A'
|
|
226
|
+
) -> dict[str, List[str]]:
|
|
227
|
+
"""
|
|
228
|
+
Resolve multiple domains concurrently.
|
|
229
|
+
|
|
230
|
+
Note: Concurrency control should be handled by the caller if needed.
|
|
231
|
+
This method will execute all queries concurrently without limits.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
domains: List of URLs/domains to resolve
|
|
235
|
+
record_type: DNS record type
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Dictionary mapping domain to resolved records
|
|
239
|
+
"""
|
|
240
|
+
tasks = [self.resolve(domain, record_type) for domain in domains]
|
|
241
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
242
|
+
|
|
243
|
+
return {
|
|
244
|
+
domain: result if isinstance(result, list) else []
|
|
245
|
+
for domain, result in zip(domains, results)
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
async def close(self):
|
|
249
|
+
"""
|
|
250
|
+
Clean up DNS resolver resources.
|
|
251
|
+
|
|
252
|
+
Should be called when done with the client to properly release resources.
|
|
253
|
+
"""
|
|
254
|
+
self._log("Closing DNS client", "INFO")
|
|
255
|
+
self._resolver = None
|
|
256
|
+
|
|
257
|
+
async def resolve_with_fallback(
|
|
258
|
+
self,
|
|
259
|
+
url: str,
|
|
260
|
+
record_types: List[str] = None
|
|
261
|
+
) -> dict[str, List[str]]:
|
|
262
|
+
"""
|
|
263
|
+
Resolve domain with multiple record types as fallback.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
url: URL or domain to resolve
|
|
267
|
+
record_types: List of record types to try (default: A, AAAA)
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Dictionary mapping record type to resolved records
|
|
271
|
+
"""
|
|
272
|
+
if record_types is None:
|
|
273
|
+
record_types = ['A', 'AAAA']
|
|
274
|
+
|
|
275
|
+
results = {}
|
|
276
|
+
for record_type in record_types:
|
|
277
|
+
records = await self.resolve(url, record_type)
|
|
278
|
+
if records:
|
|
279
|
+
results[record_type] = records
|
|
280
|
+
|
|
281
|
+
return results
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
async def main():
|
|
285
|
+
# Initialize DNS client
|
|
286
|
+
dns_client = AsyncDnsClient(
|
|
287
|
+
nameservers=["1.1.1.1", "8.8.8.8"],
|
|
288
|
+
max_retries=3,
|
|
289
|
+
timeout=5.0,
|
|
290
|
+
verbose=True
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Single resolution
|
|
294
|
+
print("=== Single Resolution ===")
|
|
295
|
+
results = await dns_client.resolve("example.com", "A")
|
|
296
|
+
print(f"A records for example.com: {results}\n")
|
|
297
|
+
|
|
298
|
+
# Multiple resolutions
|
|
299
|
+
print("=== Multiple Resolutions ===")
|
|
300
|
+
domains = ["google.com", "github.com", "cloudflare.com"]
|
|
301
|
+
results = await dns_client.resolve_many(domains, "A")
|
|
302
|
+
for domain, ips in results.items():
|
|
303
|
+
print(f"{domain}: {ips}")
|
|
304
|
+
|
|
305
|
+
# Resolution with fallback
|
|
306
|
+
print("\n=== Resolution with Fallback ===")
|
|
307
|
+
fallback_results = await dns_client.resolve_with_fallback("google.com", ["A", "AAAA"])
|
|
308
|
+
for record_type, records in fallback_results.items():
|
|
309
|
+
print(f"{record_type}: {records}")
|
|
310
|
+
|
|
311
|
+
# Different record types
|
|
312
|
+
print("\n=== Different Record Types ===")
|
|
313
|
+
mx_records = await dns_client.resolve("gmail.com", "MX")
|
|
314
|
+
print(f"MX records for gmail.com: {mx_records}")
|
|
315
|
+
|
|
316
|
+
ns_records = await dns_client.resolve("google.com", "NS")
|
|
317
|
+
print(f"NS records for google.com: {ns_records}")
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
if __name__ == "__main__":
|
|
321
|
+
asyncio.run(main())
|
|
File without changes
|
subprober/hash/hash.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import mmh3
|
|
3
|
+
from simhash import Simhash
|
|
4
|
+
from revoltlogger import Logger
|
|
5
|
+
|
|
6
|
+
def tokenize_for_simhash(text: str) -> list[str]:
|
|
7
|
+
return text.lower().split()
|
|
8
|
+
|
|
9
|
+
class HashGen:
|
|
10
|
+
def __init__(self, algorithms: list[str], verbose: bool = False):
|
|
11
|
+
self.algorithms = [alg.strip().lower() for alg in algorithms]
|
|
12
|
+
self.verbose = verbose
|
|
13
|
+
self.logger = Logger()
|
|
14
|
+
|
|
15
|
+
async def gen(self, response: str) -> dict[str, str]:
|
|
16
|
+
hashed_results = {}
|
|
17
|
+
processed_response = ""
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
processed_response = response.encode("utf-8", errors="ignore").decode("utf-8", errors="ignore")
|
|
21
|
+
except Exception as e:
|
|
22
|
+
if self.verbose:
|
|
23
|
+
self.logger.warn(f"Error processing response encoding for hashing: {e}, {type(e)}")
|
|
24
|
+
return {}
|
|
25
|
+
|
|
26
|
+
for alg in self.algorithms:
|
|
27
|
+
try:
|
|
28
|
+
if alg == "md5":
|
|
29
|
+
hashed_results[alg] = hashlib.md5(processed_response.encode("utf-8")).hexdigest()
|
|
30
|
+
elif alg == "sha1":
|
|
31
|
+
hashed_results[alg] = hashlib.sha1(processed_response.encode("utf-8")).hexdigest()
|
|
32
|
+
elif alg == "sha256":
|
|
33
|
+
hashed_results[alg] = hashlib.sha256(processed_response.encode("utf-8")).hexdigest()
|
|
34
|
+
elif alg == "sha512":
|
|
35
|
+
hashed_results[alg] = hashlib.sha512(processed_response.encode("utf-8")).hexdigest()
|
|
36
|
+
elif alg == "mmh3":
|
|
37
|
+
hashed_results[alg] = str(mmh3.hash(processed_response.encode("utf-8")))
|
|
38
|
+
elif alg == "simhash":
|
|
39
|
+
tokens = tokenize_for_simhash(processed_response)
|
|
40
|
+
if tokens:
|
|
41
|
+
hashed_results[alg] = str(Simhash(tokens).value)
|
|
42
|
+
else:
|
|
43
|
+
hashed_results[alg] = ""
|
|
44
|
+
if self.verbose:
|
|
45
|
+
self.logger.warn("Simhash received empty tokens for response. Setting empty hash.")
|
|
46
|
+
else:
|
|
47
|
+
if self.verbose:
|
|
48
|
+
self.logger.warn(f"Undefined or unsupported hash algorithm requested: '{alg}'")
|
|
49
|
+
except Exception as inner_e:
|
|
50
|
+
self.logger.warn(f"Error generating hash for algorithm '{alg}': {inner_e}, {type(inner_e)}")
|
|
51
|
+
|
|
52
|
+
return hashed_results
|
|
File without changes
|