subprober 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. subprober/__init__.py +14 -0
  2. subprober/cli/__init__.py +0 -0
  3. subprober/cli/cli.py +209 -0
  4. subprober/dnsclient/__init__.py +0 -0
  5. subprober/dnsclient/dnsclient.py +321 -0
  6. subprober/hash/__init__.py +0 -0
  7. subprober/hash/hash.py +52 -0
  8. subprober/headless/__init__.py +0 -0
  9. subprober/headless/headless.py +554 -0
  10. subprober/hmap/__init__.py +3 -0
  11. subprober/hmap/hmap.py +240 -0
  12. subprober/httpclient/__init__.py +0 -0
  13. subprober/httpclient/httpclient.py +772 -0
  14. subprober/httpfilters/__init__.py +0 -0
  15. subprober/httpfilters/httpfilters.py +664 -0
  16. subprober/jarmscanner/__init__.py +0 -0
  17. subprober/jarmscanner/jarmscanner.py +217 -0
  18. subprober/progresslogger/__init__.py +0 -0
  19. subprober/progresslogger/progresslogger.py +32 -0
  20. subprober/pyrunner/__init__.py +0 -0
  21. subprober/pyrunner/pyrunner.py +1001 -0
  22. subprober/resparser/__init__.py +0 -0
  23. subprober/resparser/resparser.py +401 -0
  24. subprober/settings/__init__.py +0 -0
  25. subprober/settings/settings.py +212 -0
  26. subprober/subprober.py +40 -0
  27. subprober/syncutils/__init__.py +0 -0
  28. subprober/syncutils/syncutils.py +35 -0
  29. subprober/urlbuilder/__init__.py +0 -0
  30. subprober/urlbuilder/urlbuilder.py +166 -0
  31. subprober/utils/__init__.py +0 -0
  32. subprober/utils/utils.py +186 -0
  33. subprober/websocketclient/__init__.py +0 -0
  34. subprober/websocketclient/websocketclient.py +283 -0
  35. subprober/workerpool/__init__.py +3 -0
  36. subprober/workerpool/workerpool.py +194 -0
  37. subprober-3.1.0.dist-info/METADATA +407 -0
  38. subprober-3.1.0.dist-info/RECORD +42 -0
  39. subprober-3.1.0.dist-info/WHEEL +5 -0
  40. subprober-3.1.0.dist-info/entry_points.txt +2 -0
  41. subprober-3.1.0.dist-info/licenses/LICENSE +0 -0
  42. subprober-3.1.0.dist-info/top_level.txt +1 -0
subprober/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ from subprober.httpclient.httpclient import RetryableHttp, HttpResponse
2
+ from subprober.pyrunner.pyrunner import PyRunner
3
+ from subprober.settings.settings import Settings
4
+ from subprober.hmap.hmap import HMap
5
+ from subprober.workerpool.workerpool import WorkerPool
6
+
7
+ __all__ = [
8
+ "RetryableHttp",
9
+ "HttpResponse",
10
+ "PyRunner",
11
+ "Settings",
12
+ "HMap",
13
+ "WorkerPool",
14
+ ]
File without changes
subprober/cli/cli.py ADDED
@@ -0,0 +1,209 @@
1
+ from richparser import RichParser
2
+
3
+ class CLI:
4
+ def __init__(self):
5
+ self.parser = None
6
+
7
+ def cli(self):
8
+ try:
9
+ parser = RichParser(
10
+ description="Subprober - An essential HTTP multi-purpose Probing Tool for Penetration Testers and Security Researchers with Asynchronous httpx client support ⚡",
11
+ auto=False
12
+ )
13
+
14
+ # INPUT Section
15
+ parser.add_argument("input", "-l", "--list", type=str,
16
+ help="specify the filename containing a list of URLs to probe")
17
+ parser.add_argument("input", "-u", "--url", type=str,
18
+ help="specify a URL to probe and supports comma-separated values (-u google.com,https://hackerone.com)")
19
+ parser.add_argument("input", "-resume", "--resume", type=str, default=None,
20
+ help="specify the resume filename generated by the subprober to continue the scan (-resume resume_Djjfos.cfg)")
21
+
22
+ # PROBES Section
23
+ parser.add_argument("probes", "-status-code", "--status-code", action="store_true",
24
+ help="display the status code of the host")
25
+ parser.add_argument("probes", "-title", "--title", action="store_true", help="display the title of host")
26
+ parser.add_argument("probes", "-server", "--server", action="store_true",
27
+ help="display the server name of the host")
28
+ parser.add_argument("probes", "-wc", "--word-count", action="store_true",
29
+ help="display the HTTP response word count")
30
+ parser.add_argument("probes", "-lc", "--line-count", action="store_true",
31
+ help="display the HTTP response line count")
32
+ parser.add_argument("probes", "-cl", "--content-length", action="store_true",
33
+ help="display the HTTP response content length")
34
+ parser.add_argument("probes", "-location", "--location", action="store_true",
35
+ help="display the redirected location of the host")
36
+ parser.add_argument("probes", "-application-type", "--application-type", action="store_true",
37
+ help="display the content type of the host")
38
+ parser.add_argument("probes", "-ip", "--ipaddress", action="store_true", help="display the IPs of the host")
39
+ parser.add_argument("probes", "-cname", "--cname", action="store_true", help="display the CNAMEs of the host")
40
+ parser.add_argument("probes", "-aaaa", "--aaaa-records", action="store_true",
41
+ help="display the AAAA records of the host")
42
+ parser.add_argument("probes", "-htv", "--http-version", action="store_true",
43
+ help="display the server supported HTTP version of the host")
44
+ parser.add_argument("probes", "-hrs", "--http-reason", action="store_true",
45
+ help="display the reason for HTTP connection of the host")
46
+ parser.add_argument("probes", "-jarm", "--jarm-fingerprint", action="store_true",
47
+ help="display the JARM fingerprint hash of the host")
48
+ parser.add_argument("probes", "-rpt", "--response-time", action="store_true",
49
+ help="display the response time for the successful request")
50
+ parser.add_argument("probes", "-wss", "--websocket", action="store_true",
51
+ help="display the server supports websockets")
52
+ parser.add_argument("probes", "-hash", "--hash", type=str,
53
+ help="display response body in hash format (supported hashes: md5, mmh3, simhash, sha1, sha256, sha512)")
54
+ parser.add_argument("probes", "-dmt", "--display-method", action="store_true",
55
+ help="display the method of the HTTP request")
56
+ parser.add_argument("probes", "-bp", "--body-preview", action="store_true",
57
+ help="display the HTTP response body in first n number of characters (default: 100)")
58
+ parser.add_argument("probes", "-body", "--body", type=str, default=None,
59
+ help="post body to include in the http request and support all post body types (ex: -body 'username=admin&password=password')")
60
+ parser.add_argument("probes", "-resolvers", "--resolvers", type=str, default="8.8.8.8,1.1.1.1",
61
+ help="custom DNS resolver for dns resolution and supports comma-separated (ex -resolvers 8.8.8.8,1.1.1.1)")
62
+
63
+ # CONFIG Section
64
+ parser.add_argument("config", "-dhp", "--disable-http-probe", action="store_true",
65
+ help="disable subprober from fallback to http scheme (default: disabled)")
66
+ parser.add_argument("config", "-X", "--method", type=str,
67
+ choices=["get", "post", "head", "put", "delete", "patch", "trace", "connect",
68
+ "options"], default="get",
69
+ help="request methods to probe and get response (supported: get, post, head, put, delete, patch, trace, connect, options) (default: get)")
70
+ parser.add_argument("config", "-H", "--header", action="append",
71
+ help="add custom headers for probing and -H can be used multiple times to pass multiple header values (ex: -H application/json -H X-Forwarded-Host: 127.0.0.1)")
72
+ parser.add_argument("config", "-ra", "--random-agent", action="store_true",
73
+ help="enable Random User-Agent to use for probing and applies same to screenshots. (default: subprober/Alpha)")
74
+ parser.add_argument("config", "-proxy", "--proxy", type=str,
75
+ help="specify a proxy to send the requests through it (ex: http://127.0.0.1:8080)", default=None)
76
+ parser.add_argument("config", "-ar", "--allow-redirect", action="store_true",
77
+ help="enable following redirections")
78
+ parser.add_argument("config", "-maxr", "--max-redirection", type=int, default=10,
79
+ help="set max value to follow redirections (default: 10)")
80
+ parser.add_argument("config", "-http2", "--http2", action="store_true",
81
+ help="enable to request with HTTP/2 support (default: Http/1.1) (info: deprecated)")
82
+ parser.add_argument("config", "-sni", "--sni-hostname", type=str,
83
+ help="set custom TLS SNI host name for requests.")
84
+
85
+ parser.add_argument("config", "-stats", "--stats",action="store_true",
86
+ help="show the progress stats of the subprober (info: specially for docker environments)")
87
+
88
+ # MISCELLANEOUS Section
89
+ parser.add_argument("miscellaneous", "-path", "--path", type=str,
90
+ help="specify a path or text file of paths for probing and getting results (example: -p admin.php or -p paths.txt)")
91
+ parser.add_argument("miscellaneous", "-port", "--port", type=str,
92
+ help="set custom port for making HTTP request and default ports are 80,443 based on the url scheme")
93
+ parser.add_argument("miscellaneous", "-tls", "--tls", action="store_true",
94
+ help="grabs the TLS data for the requested host")
95
+
96
+ # HEADLESS Section
97
+ parser.add_argument("headless", "-ss", "--screenshot", action="store_true",
98
+ help="enable to take screenshots of the page using headless browsers with asynchronous performance")
99
+ parser.add_argument("headless", "-st", "--screenshot-timeout", type=int, default=10,
100
+ help="set a timeout value for taking screenshots (default: 15)")
101
+ parser.add_argument("headless", "-scp", "--system-chrome-path", type=str, default=None,
102
+ help="specify the executable path of the chromedriver to use system chrome to take screenshots")
103
+ parser.add_argument("headless", "-pdf", "--save-pdf", action="store_true",
104
+ help="enable to save the screenshot image in the pdf format (default: png)")
105
+ parser.add_argument("headless", "-no-fpg", "--no-full-page", action="store_true",
106
+ help="disable saving screenshot in full page")
107
+ parser.add_argument("headless", "-icb", "--include-bytes", action="store_true",
108
+ help="enable to include the screenshot bytes in output when json output enabled")
109
+ parser.add_argument("headless", "-hos", "--headless-options", type=str, default=None,
110
+ help="set additional chrome headless browser options and supports comma-separated values (-ho \"--start-maximized\")")
111
+ parser.add_argument("headless", "-sid", "--screenshot-idle", default=1, type=int,
112
+ help="set custom idle time in seconds before taking screenshots (default: 1)")
113
+ parser.add_argument("headless", "-sp", "--screenshot-path", type=str,
114
+ help="specify a directory path to store screenshot results (default: currentdir/screenshots)")
115
+
116
+ # MATCHERS Section
117
+ parser.add_argument("matchers", "-mc", "--match-code", type=str,
118
+ help="match http response by specified status codes and supports comma-separated values (-mc 200,302)")
119
+ parser.add_argument("matchers", "-mcr", "--match-code-range", type=str,
120
+ help="match http response by specified status code range and supports single value (-mcr 200-299)")
121
+ parser.add_argument("matchers", "-ms", "--match-string", type=str,
122
+ help="match http response containing the specified string and supports comma-separated values (-ms admin,login)")
123
+ parser.add_argument("matchers", "-mr", "--match-regex", type=str,
124
+ help="match http response matching the specified regex and supports comma-separated values (-mr .*admin.*,.*login.*)")
125
+ parser.add_argument("matchers", "-mpt", "--match-path", type=str,
126
+ help="match http response by URL path and supports comma-separated values (-mpt /admin/wp-ajax.php,/wp-json)")
127
+ parser.add_argument("matchers", "-ml", "--match-length", type=str,
128
+ help="match http response by specified response length and supports comma-separated values (-ml 1024,2048)")
129
+ parser.add_argument("matchers", "-mlc", "--match-line-count", type=str,
130
+ help="match http response by specified response line count and supports comma-separated values (-mlc 10,50)")
131
+ parser.add_argument("matchers", "-mwc", "--match-word-count", type=str,
132
+ help="match http response by specified word count and supports comma-separated values (-mwc 100,500)")
133
+ parser.add_argument("matchers", "-mrt", "--match-response-time", type=float,
134
+ help="match http response exceeding the specified minimum response time in seconds (-mrt 2.30)")
135
+
136
+ # FILTERS Section
137
+ parser.add_argument("filters", "-fc", "--filter-code", type=str,
138
+ help="filter http response by specified status codes and supports comma-separated values (-fc 404,500)")
139
+ parser.add_argument("filters", "-fcr", "--filter-code-range", type=str,
140
+ help="filter http response by specified status code range and supports single value (-fcr 400-499)")
141
+ parser.add_argument("filters", "-fs", "--filter-string", type=str,
142
+ help="filter http response containing the specified string and supports comma-separated values (-fs error,not found)")
143
+ parser.add_argument("filters", "-fr", "--filter-regex", type=str,
144
+ help="filter http response matching the specified regex and supports comma-separated values (-fr .*admin.*,.*login.*)")
145
+ parser.add_argument("filters", "-fpt", "--filter-path", type=str,
146
+ help="filter http response by URL path and supports comma-separated values (-fpt /error,404.html)")
147
+ parser.add_argument("filters", "-fl", "--filter-length", type=str,
148
+ help="filter http response by specified response length and supports comma-separated values (-fl 1024,2048)")
149
+ parser.add_argument("filters", "-flc", "--filter-line-count", type=str,
150
+ help="filter http response by specified response line count and supports comma-separated values (-flc 10,50)")
151
+ parser.add_argument("filters", "-fwc", "--filter-word-count", type=str,
152
+ help="filter http response by specified response word count and supports comma-separated values (-fwc 100,500)")
153
+ parser.add_argument("filters", "-frt", "--filter-response-time", type=float,
154
+ help="filter http response exceeding the specified maximum response time in seconds (-frt 2.30)")
155
+
156
+ # OUTPUT Section
157
+ parser.add_argument("output", "-o", "--output", type=str,
158
+ help="define the output filename to store the results of the probing operation.")
159
+ parser.add_argument("output", "-json", "--json", action="store_true",
160
+ help="store and display output in JSON format (includes only data from enabled options).")
161
+ parser.add_argument("output", "-rdu", "--redirect-urls", action="store_true",
162
+ help="display the redirect URLs in the output (requires -json and -ar to enabled to enabled).")
163
+ parser.add_argument("output", "-rdh", "--redirect-history", action="store_true",
164
+ help="display the full redirect history (requires -json and -ar to enabled).")
165
+ parser.add_argument("output", "-rsc", "--redirect-status-codes", action="store_true",
166
+ help="display the status codes for redirections (requires -json and -ar to enabled).")
167
+ parser.add_argument("output", "-rqh", "--request-headers", action="store_true",
168
+ help="include request headers in the output (requires -json and -ar to enabled).")
169
+ parser.add_argument("output", "-rsh", "--response-headers", action="store_true",
170
+ help="include response headers in the output (requires -json and -ar to enabled).")
171
+ parser.add_argument("output", "-fo", "--full-output", action="store_true",
172
+ help="include all available data in the output (requires -json to enabled and doesn't overrides websocket,jarm,hashes options).")
173
+
174
+ # RATE-LIMIT Section
175
+ parser.add_argument("rate-limit", "-c", "--concurrency", type=int, default=100,
176
+ help="set the concurrency level for sending http requests (default: 100)")
177
+ parser.add_argument("rate-limit", "-rtl", "--rate-limit", type=int, default=150,
178
+ help="set a rate limit for sending a maximum number of requests per second (default: 1000)")
179
+
180
+ # OPTIMIZATION Section
181
+ parser.add_argument("optimization", "-timeout", "--timeout", type=int, default=10,
182
+ help="set a custom timeout value for sending requests.")
183
+ parser.add_argument("optimization", "-delay", "--delay", type=float, default=0.5,
184
+ help="set a delay in seconds before sending each request (default: None)")
185
+ parser.add_argument("optimization", "-rts", "--retries", type=int, default=0,
186
+ help="set a number of retries if a request fails to connect (default: 0)")
187
+
188
+ # UPDATES Section
189
+ parser.add_argument("updates", "-up", "--update", action="store_true",
190
+ help="update subprober to the latest version (uv tool required to be installed)")
191
+ parser.add_argument("updates", "-sup", "--show-updates", action="store_true",
192
+ help="display the current or latest version of subprober updates")
193
+
194
+
195
+ parser.add_argument("debug", "-silent", "--silent", action="store_true",
196
+ help="enable silent mode to suppress the display of Subprober banner and version information.")
197
+ parser.add_argument("debug", "-verbose", "--verbose", action="store_true",
198
+ help="enable verbose mode to display error results on the console.")
199
+ parser.add_argument("debug", "-nc", "--no-color", action="store_true",
200
+ help="enable to display the output without any CLI colors")
201
+ parser.add_argument("debug", "-debug", "--debug", action="store_true", help="debug the subprober workflow")
202
+
203
+ self.parser = parser
204
+ return parser.parse_args()
205
+ except KeyboardInterrupt:
206
+ exit(1)
207
+ except Exception as e:
208
+ print(f"Exception occurred in the cli module due to: {e}, {type(e)}")
209
+ exit(1)
File without changes
@@ -0,0 +1,321 @@
1
+ import aiodns
2
+ import asyncio
3
+ import socket
4
+ from typing import Optional, List
5
+ from contextlib import asynccontextmanager
6
+
7
+
8
+ class AsyncDnsClient:
9
+ """
10
+ Asynchronous DNS client for domain resolution.
11
+
12
+ This client uses aiodns library to perform DNS queries with retry logic.
13
+ Concurrency control is handled by the caller.
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ nameservers: Optional[List[str]] = None,
19
+ max_retries: int = 3,
20
+ timeout: float = 5.0,
21
+ verbose: bool = False
22
+ ):
23
+ """
24
+ Initialize DNS client.
25
+
26
+ Args:
27
+ nameservers: List of DNS servers to use (default: Cloudflare and Google)
28
+ max_retries: Maximum retry attempts per query
29
+ timeout: Query timeout in seconds
30
+ verbose: Enable verbose logging
31
+ """
32
+ self.nameservers = nameservers or ["1.1.1.1", "8.8.8.8"]
33
+ self.max_retries = max_retries
34
+ self.timeout = timeout
35
+ self.verbose = verbose
36
+ self._resolver = None
37
+ self._resolver_lock = asyncio.Lock()
38
+
39
+ def _log(self, message: str, level: str = "INFO"):
40
+ """Log message if verbose is enabled."""
41
+ if self.verbose:
42
+ print(f"[DNS-{level}] {message}")
43
+
44
+ async def _get_resolver(self) -> aiodns.DNSResolver:
45
+ """
46
+ Get or create a shared DNS resolver instance.
47
+
48
+ Reuses a single resolver instance to avoid exhausting system inotify watches.
49
+ Thread-safe through async lock.
50
+ """
51
+ if self._resolver is None:
52
+ async with self._resolver_lock:
53
+ # Double-check after acquiring lock
54
+ if self._resolver is None:
55
+ try:
56
+ loop = asyncio.get_running_loop()
57
+ self._resolver = aiodns.DNSResolver(
58
+ loop=loop,
59
+ nameservers=self.nameservers,
60
+ rotate=True,
61
+ timeout=self.timeout
62
+ )
63
+ self._log(f"DNS resolver created with nameservers: {self.nameservers}")
64
+ except Exception as e:
65
+ self._log(f"Failed to create DNS resolver: {e}", "ERROR")
66
+ raise
67
+ return self._resolver
68
+
69
+ def _extract_domain(self, url: str) -> Optional[str]:
70
+ """
71
+ Extract domain from URL.
72
+
73
+ Args:
74
+ url: URL or domain string
75
+
76
+ Returns:
77
+ Cleaned domain name or None if invalid
78
+ """
79
+ if not url:
80
+ return None
81
+
82
+ # Remove protocol
83
+ if "://" in url:
84
+ url = url.split("://", 1)[1]
85
+
86
+ # Remove path, query, fragment
87
+ url = url.split("/", 1)[0].split("?", 1)[0].split("#", 1)[0]
88
+
89
+ # Remove port
90
+ url = url.split(":", 1)[0]
91
+
92
+ # Remove trailing dot
93
+ url = url.rstrip(".")
94
+
95
+ return url.lower() if url else None
96
+
97
+ async def _query_with_retry(
98
+ self,
99
+ domain: str,
100
+ record_type: str,
101
+ resolver: aiodns.DNSResolver
102
+ ) -> List[str]:
103
+ """
104
+ Execute DNS query with retry logic.
105
+
106
+ Args:
107
+ domain: Domain to query
108
+ record_type: DNS record type (A, AAAA, CNAME, etc.)
109
+ resolver: DNS resolver instance
110
+
111
+ Returns:
112
+ List of resolved records
113
+ """
114
+ last_exception = None
115
+
116
+ for attempt in range(1, self.max_retries + 1):
117
+ try:
118
+ self._log(f"Querying {domain} ({record_type}) - Attempt {attempt}/{self.max_retries}")
119
+
120
+ results = await asyncio.wait_for(
121
+ resolver.query(domain, record_type),
122
+ timeout=self.timeout
123
+ )
124
+
125
+ # Parse results based on record type
126
+ if record_type == 'CNAME':
127
+ records = [results.cname] if hasattr(results, 'cname') else []
128
+ elif record_type in ('A', 'AAAA'):
129
+ records = [result.host for result in results] if results else []
130
+ elif record_type == 'MX':
131
+ records = [result.host for result in results] if results else []
132
+ elif record_type == 'TXT':
133
+ records = [result.text for result in results] if results else []
134
+ elif record_type == 'NS':
135
+ records = [result.host for result in results] if results else []
136
+ elif record_type == 'SOA':
137
+ if results:
138
+ soa = results
139
+ records = [f"{soa.nsname} {soa.hostmaster}"]
140
+ else:
141
+ records = []
142
+ elif record_type == 'PTR':
143
+ records = [result.host for result in results] if results else []
144
+ else:
145
+ records = []
146
+
147
+ if records:
148
+ self._log(f"Successfully resolved {domain} ({record_type}): {records}")
149
+ return records
150
+ else:
151
+ self._log(f"No records found for {domain} ({record_type})")
152
+ return []
153
+
154
+ except asyncio.TimeoutError:
155
+ last_exception = f"Timeout after {self.timeout}s"
156
+ self._log(f"Timeout for {domain} ({record_type}) on attempt {attempt}", "WARN")
157
+
158
+ except aiodns.error.DNSError as e:
159
+ # DNS-specific errors (NXDOMAIN, SERVFAIL, etc.)
160
+ if e.args[0] == aiodns.error.ARES_ENOTFOUND:
161
+ self._log(f"Domain not found: {domain}", "DEBUG")
162
+ return [] # No retry for NXDOMAIN
163
+ elif e.args[0] == aiodns.error.ARES_ENODATA:
164
+ self._log(f"No data for {domain} ({record_type})", "DEBUG")
165
+ return [] # No retry for NODATA
166
+ last_exception = f"DNS error: {e}"
167
+ self._log(f"DNS error for {domain} ({record_type}): {e}", "WARN")
168
+
169
+ except socket.gaierror as e:
170
+ last_exception = f"Socket error: {e}"
171
+ self._log(f"Socket error for {domain} ({record_type}): {e}", "WARN")
172
+
173
+ except (KeyboardInterrupt, asyncio.CancelledError):
174
+ self._log("Query cancelled by user", "INFO")
175
+ raise
176
+
177
+ except Exception as e:
178
+ last_exception = f"Unexpected error: {e}"
179
+ self._log(f"Unexpected error for {domain} ({record_type}): {e}", "ERROR")
180
+
181
+ # Exponential backoff between retries
182
+ if attempt < self.max_retries:
183
+ backoff = min(2 ** (attempt - 1), 5) # Max 5 seconds
184
+ self._log(f"Retrying in {backoff}s...", "DEBUG")
185
+ await asyncio.sleep(backoff)
186
+
187
+ # All retries exhausted
188
+ self._log(f"All retries exhausted for {domain} ({record_type}). Last error: {last_exception}", "ERROR")
189
+ return []
190
+
191
+ async def resolve(
192
+ self,
193
+ url: str,
194
+ record_type: str = 'A'
195
+ ) -> List[str]:
196
+ """
197
+ Resolve domain with automatic retry and resource management.
198
+
199
+ Args:
200
+ url: URL or domain to resolve
201
+ record_type: DNS record type (A, AAAA, CNAME, MX, TXT, NS, SOA, PTR)
202
+
203
+ Returns:
204
+ List of resolved records
205
+ """
206
+ domain = self._extract_domain(url)
207
+
208
+ if not domain:
209
+ self._log(f"Invalid domain: {url}", "ERROR")
210
+ return []
211
+
212
+ try:
213
+ # Get shared resolver instance
214
+ resolver = await self._get_resolver()
215
+ return await self._query_with_retry(domain, record_type, resolver)
216
+ except (KeyboardInterrupt, asyncio.CancelledError):
217
+ return []
218
+ except Exception as e:
219
+ self._log(f"Fatal error resolving {domain}: {e}", "ERROR")
220
+ return []
221
+
222
+ async def resolve_many(
223
+ self,
224
+ domains: List[str],
225
+ record_type: str = 'A'
226
+ ) -> dict[str, List[str]]:
227
+ """
228
+ Resolve multiple domains concurrently.
229
+
230
+ Note: Concurrency control should be handled by the caller if needed.
231
+ This method will execute all queries concurrently without limits.
232
+
233
+ Args:
234
+ domains: List of URLs/domains to resolve
235
+ record_type: DNS record type
236
+
237
+ Returns:
238
+ Dictionary mapping domain to resolved records
239
+ """
240
+ tasks = [self.resolve(domain, record_type) for domain in domains]
241
+ results = await asyncio.gather(*tasks, return_exceptions=True)
242
+
243
+ return {
244
+ domain: result if isinstance(result, list) else []
245
+ for domain, result in zip(domains, results)
246
+ }
247
+
248
+ async def close(self):
249
+ """
250
+ Clean up DNS resolver resources.
251
+
252
+ Should be called when done with the client to properly release resources.
253
+ """
254
+ self._log("Closing DNS client", "INFO")
255
+ self._resolver = None
256
+
257
+ async def resolve_with_fallback(
258
+ self,
259
+ url: str,
260
+ record_types: List[str] = None
261
+ ) -> dict[str, List[str]]:
262
+ """
263
+ Resolve domain with multiple record types as fallback.
264
+
265
+ Args:
266
+ url: URL or domain to resolve
267
+ record_types: List of record types to try (default: A, AAAA)
268
+
269
+ Returns:
270
+ Dictionary mapping record type to resolved records
271
+ """
272
+ if record_types is None:
273
+ record_types = ['A', 'AAAA']
274
+
275
+ results = {}
276
+ for record_type in record_types:
277
+ records = await self.resolve(url, record_type)
278
+ if records:
279
+ results[record_type] = records
280
+
281
+ return results
282
+
283
+
284
+ async def main():
285
+ # Initialize DNS client
286
+ dns_client = AsyncDnsClient(
287
+ nameservers=["1.1.1.1", "8.8.8.8"],
288
+ max_retries=3,
289
+ timeout=5.0,
290
+ verbose=True
291
+ )
292
+
293
+ # Single resolution
294
+ print("=== Single Resolution ===")
295
+ results = await dns_client.resolve("example.com", "A")
296
+ print(f"A records for example.com: {results}\n")
297
+
298
+ # Multiple resolutions
299
+ print("=== Multiple Resolutions ===")
300
+ domains = ["google.com", "github.com", "cloudflare.com"]
301
+ results = await dns_client.resolve_many(domains, "A")
302
+ for domain, ips in results.items():
303
+ print(f"{domain}: {ips}")
304
+
305
+ # Resolution with fallback
306
+ print("\n=== Resolution with Fallback ===")
307
+ fallback_results = await dns_client.resolve_with_fallback("google.com", ["A", "AAAA"])
308
+ for record_type, records in fallback_results.items():
309
+ print(f"{record_type}: {records}")
310
+
311
+ # Different record types
312
+ print("\n=== Different Record Types ===")
313
+ mx_records = await dns_client.resolve("gmail.com", "MX")
314
+ print(f"MX records for gmail.com: {mx_records}")
315
+
316
+ ns_records = await dns_client.resolve("google.com", "NS")
317
+ print(f"NS records for google.com: {ns_records}")
318
+
319
+
320
+ if __name__ == "__main__":
321
+ asyncio.run(main())
File without changes
subprober/hash/hash.py ADDED
@@ -0,0 +1,52 @@
1
+ import hashlib
2
+ import mmh3
3
+ from simhash import Simhash
4
+ from revoltlogger import Logger
5
+
6
+ def tokenize_for_simhash(text: str) -> list[str]:
7
+ return text.lower().split()
8
+
9
+ class HashGen:
10
+ def __init__(self, algorithms: list[str], verbose: bool = False):
11
+ self.algorithms = [alg.strip().lower() for alg in algorithms]
12
+ self.verbose = verbose
13
+ self.logger = Logger()
14
+
15
+ async def gen(self, response: str) -> dict[str, str]:
16
+ hashed_results = {}
17
+ processed_response = ""
18
+
19
+ try:
20
+ processed_response = response.encode("utf-8", errors="ignore").decode("utf-8", errors="ignore")
21
+ except Exception as e:
22
+ if self.verbose:
23
+ self.logger.warn(f"Error processing response encoding for hashing: {e}, {type(e)}")
24
+ return {}
25
+
26
+ for alg in self.algorithms:
27
+ try:
28
+ if alg == "md5":
29
+ hashed_results[alg] = hashlib.md5(processed_response.encode("utf-8")).hexdigest()
30
+ elif alg == "sha1":
31
+ hashed_results[alg] = hashlib.sha1(processed_response.encode("utf-8")).hexdigest()
32
+ elif alg == "sha256":
33
+ hashed_results[alg] = hashlib.sha256(processed_response.encode("utf-8")).hexdigest()
34
+ elif alg == "sha512":
35
+ hashed_results[alg] = hashlib.sha512(processed_response.encode("utf-8")).hexdigest()
36
+ elif alg == "mmh3":
37
+ hashed_results[alg] = str(mmh3.hash(processed_response.encode("utf-8")))
38
+ elif alg == "simhash":
39
+ tokens = tokenize_for_simhash(processed_response)
40
+ if tokens:
41
+ hashed_results[alg] = str(Simhash(tokens).value)
42
+ else:
43
+ hashed_results[alg] = ""
44
+ if self.verbose:
45
+ self.logger.warn("Simhash received empty tokens for response. Setting empty hash.")
46
+ else:
47
+ if self.verbose:
48
+ self.logger.warn(f"Undefined or unsupported hash algorithm requested: '{alg}'")
49
+ except Exception as inner_e:
50
+ self.logger.warn(f"Error generating hash for algorithm '{alg}': {inner_e}, {type(inner_e)}")
51
+
52
+ return hashed_results
File without changes