souleyez 2.43.29__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. souleyez/__init__.py +1 -2
  2. souleyez/ai/__init__.py +21 -15
  3. souleyez/ai/action_mapper.py +249 -150
  4. souleyez/ai/chain_advisor.py +116 -100
  5. souleyez/ai/claude_provider.py +29 -28
  6. souleyez/ai/context_builder.py +80 -62
  7. souleyez/ai/executor.py +158 -117
  8. souleyez/ai/feedback_handler.py +136 -121
  9. souleyez/ai/llm_factory.py +27 -20
  10. souleyez/ai/llm_provider.py +4 -2
  11. souleyez/ai/ollama_provider.py +6 -9
  12. souleyez/ai/ollama_service.py +44 -37
  13. souleyez/ai/path_scorer.py +91 -76
  14. souleyez/ai/recommender.py +176 -144
  15. souleyez/ai/report_context.py +74 -73
  16. souleyez/ai/report_service.py +84 -66
  17. souleyez/ai/result_parser.py +222 -229
  18. souleyez/ai/safety.py +67 -44
  19. souleyez/auth/__init__.py +23 -22
  20. souleyez/auth/audit.py +36 -26
  21. souleyez/auth/engagement_access.py +65 -48
  22. souleyez/auth/permissions.py +14 -3
  23. souleyez/auth/session_manager.py +54 -37
  24. souleyez/auth/user_manager.py +109 -64
  25. souleyez/commands/audit.py +40 -43
  26. souleyez/commands/auth.py +35 -15
  27. souleyez/commands/deliverables.py +55 -50
  28. souleyez/commands/engagement.py +47 -28
  29. souleyez/commands/license.py +32 -23
  30. souleyez/commands/screenshots.py +36 -32
  31. souleyez/commands/user.py +82 -36
  32. souleyez/config.py +52 -44
  33. souleyez/core/credential_tester.py +87 -81
  34. souleyez/core/cve_mappings.py +179 -192
  35. souleyez/core/cve_matcher.py +162 -148
  36. souleyez/core/msf_auto_mapper.py +100 -83
  37. souleyez/core/msf_chain_engine.py +294 -256
  38. souleyez/core/msf_database.py +153 -70
  39. souleyez/core/msf_integration.py +679 -673
  40. souleyez/core/msf_rpc_client.py +40 -42
  41. souleyez/core/msf_rpc_manager.py +77 -79
  42. souleyez/core/msf_sync_manager.py +241 -181
  43. souleyez/core/network_utils.py +22 -15
  44. souleyez/core/parser_handler.py +34 -25
  45. souleyez/core/pending_chains.py +114 -63
  46. souleyez/core/templates.py +158 -107
  47. souleyez/core/tool_chaining.py +9564 -2881
  48. souleyez/core/version_utils.py +79 -94
  49. souleyez/core/vuln_correlation.py +136 -89
  50. souleyez/core/web_utils.py +33 -32
  51. souleyez/data/wordlists/ad_users.txt +378 -0
  52. souleyez/data/wordlists/api_endpoints_large.txt +769 -0
  53. souleyez/data/wordlists/home_dir_sensitive.txt +39 -0
  54. souleyez/data/wordlists/lfi_payloads.txt +82 -0
  55. souleyez/data/wordlists/passwords_brute.txt +1548 -0
  56. souleyez/data/wordlists/passwords_crack.txt +2479 -0
  57. souleyez/data/wordlists/passwords_spray.txt +386 -0
  58. souleyez/data/wordlists/subdomains_large.txt +5057 -0
  59. souleyez/data/wordlists/usernames_common.txt +694 -0
  60. souleyez/data/wordlists/web_dirs_large.txt +4769 -0
  61. souleyez/detection/__init__.py +1 -1
  62. souleyez/detection/attack_signatures.py +12 -17
  63. souleyez/detection/mitre_mappings.py +61 -55
  64. souleyez/detection/validator.py +97 -86
  65. souleyez/devtools.py +23 -10
  66. souleyez/docs/README.md +4 -4
  67. souleyez/docs/api-reference/cli-commands.md +2 -2
  68. souleyez/docs/developer-guide/adding-new-tools.md +562 -0
  69. souleyez/docs/user-guide/auto-chaining.md +30 -8
  70. souleyez/docs/user-guide/getting-started.md +1 -1
  71. souleyez/docs/user-guide/installation.md +26 -3
  72. souleyez/docs/user-guide/metasploit-integration.md +2 -2
  73. souleyez/docs/user-guide/rbac.md +1 -1
  74. souleyez/docs/user-guide/scope-management.md +1 -1
  75. souleyez/docs/user-guide/siem-integration.md +1 -1
  76. souleyez/docs/user-guide/tools-reference.md +1 -8
  77. souleyez/docs/user-guide/worker-management.md +1 -1
  78. souleyez/engine/background.py +1239 -535
  79. souleyez/engine/base.py +4 -1
  80. souleyez/engine/job_status.py +17 -49
  81. souleyez/engine/log_sanitizer.py +103 -77
  82. souleyez/engine/manager.py +38 -7
  83. souleyez/engine/result_handler.py +2200 -1550
  84. souleyez/engine/worker_manager.py +50 -41
  85. souleyez/export/evidence_bundle.py +72 -62
  86. souleyez/feature_flags/features.py +16 -20
  87. souleyez/feature_flags.py +5 -9
  88. souleyez/handlers/__init__.py +11 -0
  89. souleyez/handlers/base.py +188 -0
  90. souleyez/handlers/bash_handler.py +277 -0
  91. souleyez/handlers/bloodhound_handler.py +243 -0
  92. souleyez/handlers/certipy_handler.py +311 -0
  93. souleyez/handlers/crackmapexec_handler.py +486 -0
  94. souleyez/handlers/dnsrecon_handler.py +344 -0
  95. souleyez/handlers/enum4linux_handler.py +400 -0
  96. souleyez/handlers/evil_winrm_handler.py +493 -0
  97. souleyez/handlers/ffuf_handler.py +815 -0
  98. souleyez/handlers/gobuster_handler.py +1114 -0
  99. souleyez/handlers/gpp_extract_handler.py +334 -0
  100. souleyez/handlers/hashcat_handler.py +444 -0
  101. souleyez/handlers/hydra_handler.py +564 -0
  102. souleyez/handlers/impacket_getuserspns_handler.py +343 -0
  103. souleyez/handlers/impacket_psexec_handler.py +222 -0
  104. souleyez/handlers/impacket_secretsdump_handler.py +426 -0
  105. souleyez/handlers/john_handler.py +286 -0
  106. souleyez/handlers/katana_handler.py +425 -0
  107. souleyez/handlers/kerbrute_handler.py +298 -0
  108. souleyez/handlers/ldapsearch_handler.py +636 -0
  109. souleyez/handlers/lfi_extract_handler.py +464 -0
  110. souleyez/handlers/msf_auxiliary_handler.py +409 -0
  111. souleyez/handlers/msf_exploit_handler.py +380 -0
  112. souleyez/handlers/nikto_handler.py +413 -0
  113. souleyez/handlers/nmap_handler.py +821 -0
  114. souleyez/handlers/nuclei_handler.py +359 -0
  115. souleyez/handlers/nxc_handler.py +417 -0
  116. souleyez/handlers/rdp_sec_check_handler.py +353 -0
  117. souleyez/handlers/registry.py +292 -0
  118. souleyez/handlers/responder_handler.py +232 -0
  119. souleyez/handlers/service_explorer_handler.py +434 -0
  120. souleyez/handlers/smbclient_handler.py +344 -0
  121. souleyez/handlers/smbmap_handler.py +510 -0
  122. souleyez/handlers/smbpasswd_handler.py +296 -0
  123. souleyez/handlers/sqlmap_handler.py +1116 -0
  124. souleyez/handlers/theharvester_handler.py +601 -0
  125. souleyez/handlers/web_login_test_handler.py +327 -0
  126. souleyez/handlers/whois_handler.py +277 -0
  127. souleyez/handlers/wpscan_handler.py +554 -0
  128. souleyez/history.py +32 -16
  129. souleyez/importers/msf_importer.py +106 -75
  130. souleyez/importers/smart_importer.py +208 -147
  131. souleyez/integrations/siem/__init__.py +10 -10
  132. souleyez/integrations/siem/base.py +17 -18
  133. souleyez/integrations/siem/elastic.py +108 -122
  134. souleyez/integrations/siem/factory.py +207 -80
  135. souleyez/integrations/siem/googlesecops.py +146 -154
  136. souleyez/integrations/siem/rule_mappings/__init__.py +1 -1
  137. souleyez/integrations/siem/rule_mappings/wazuh_rules.py +8 -5
  138. souleyez/integrations/siem/sentinel.py +107 -109
  139. souleyez/integrations/siem/splunk.py +246 -212
  140. souleyez/integrations/siem/wazuh.py +65 -71
  141. souleyez/integrations/wazuh/__init__.py +5 -5
  142. souleyez/integrations/wazuh/client.py +70 -93
  143. souleyez/integrations/wazuh/config.py +85 -57
  144. souleyez/integrations/wazuh/host_mapper.py +28 -36
  145. souleyez/integrations/wazuh/sync.py +78 -68
  146. souleyez/intelligence/__init__.py +4 -5
  147. souleyez/intelligence/correlation_analyzer.py +309 -295
  148. souleyez/intelligence/exploit_knowledge.py +661 -623
  149. souleyez/intelligence/exploit_suggestions.py +159 -139
  150. souleyez/intelligence/gap_analyzer.py +132 -97
  151. souleyez/intelligence/gap_detector.py +251 -214
  152. souleyez/intelligence/sensitive_tables.py +266 -129
  153. souleyez/intelligence/service_parser.py +137 -123
  154. souleyez/intelligence/surface_analyzer.py +407 -268
  155. souleyez/intelligence/target_parser.py +159 -162
  156. souleyez/licensing/__init__.py +6 -6
  157. souleyez/licensing/validator.py +17 -19
  158. souleyez/log_config.py +79 -54
  159. souleyez/main.py +1505 -687
  160. souleyez/migrations/fix_job_counter.py +16 -14
  161. souleyez/parsers/bloodhound_parser.py +41 -39
  162. souleyez/parsers/crackmapexec_parser.py +178 -111
  163. souleyez/parsers/dalfox_parser.py +72 -77
  164. souleyez/parsers/dnsrecon_parser.py +103 -91
  165. souleyez/parsers/enum4linux_parser.py +183 -153
  166. souleyez/parsers/ffuf_parser.py +29 -25
  167. souleyez/parsers/gobuster_parser.py +301 -41
  168. souleyez/parsers/hashcat_parser.py +324 -79
  169. souleyez/parsers/http_fingerprint_parser.py +350 -103
  170. souleyez/parsers/hydra_parser.py +131 -111
  171. souleyez/parsers/impacket_parser.py +231 -178
  172. souleyez/parsers/john_parser.py +98 -86
  173. souleyez/parsers/katana_parser.py +316 -0
  174. souleyez/parsers/msf_parser.py +943 -498
  175. souleyez/parsers/nikto_parser.py +346 -65
  176. souleyez/parsers/nmap_parser.py +262 -174
  177. souleyez/parsers/nuclei_parser.py +40 -44
  178. souleyez/parsers/responder_parser.py +26 -26
  179. souleyez/parsers/searchsploit_parser.py +74 -74
  180. souleyez/parsers/service_explorer_parser.py +279 -0
  181. souleyez/parsers/smbmap_parser.py +180 -124
  182. souleyez/parsers/sqlmap_parser.py +434 -308
  183. souleyez/parsers/theharvester_parser.py +75 -57
  184. souleyez/parsers/whois_parser.py +135 -94
  185. souleyez/parsers/wpscan_parser.py +278 -190
  186. souleyez/plugins/afp.py +44 -36
  187. souleyez/plugins/afp_brute.py +114 -46
  188. souleyez/plugins/ard.py +48 -37
  189. souleyez/plugins/bloodhound.py +95 -61
  190. souleyez/plugins/certipy.py +303 -0
  191. souleyez/plugins/crackmapexec.py +186 -85
  192. souleyez/plugins/dalfox.py +120 -59
  193. souleyez/plugins/dns_hijack.py +146 -41
  194. souleyez/plugins/dnsrecon.py +97 -61
  195. souleyez/plugins/enum4linux.py +91 -66
  196. souleyez/plugins/evil_winrm.py +291 -0
  197. souleyez/plugins/ffuf.py +166 -90
  198. souleyez/plugins/firmware_extract.py +133 -29
  199. souleyez/plugins/gobuster.py +387 -190
  200. souleyez/plugins/gpp_extract.py +393 -0
  201. souleyez/plugins/hashcat.py +100 -73
  202. souleyez/plugins/http_fingerprint.py +913 -267
  203. souleyez/plugins/hydra.py +566 -200
  204. souleyez/plugins/impacket_getnpusers.py +117 -69
  205. souleyez/plugins/impacket_psexec.py +84 -64
  206. souleyez/plugins/impacket_secretsdump.py +103 -69
  207. souleyez/plugins/impacket_smbclient.py +89 -75
  208. souleyez/plugins/john.py +86 -69
  209. souleyez/plugins/katana.py +313 -0
  210. souleyez/plugins/kerbrute.py +237 -0
  211. souleyez/plugins/lfi_extract.py +541 -0
  212. souleyez/plugins/macos_ssh.py +117 -48
  213. souleyez/plugins/mdns.py +35 -30
  214. souleyez/plugins/msf_auxiliary.py +253 -130
  215. souleyez/plugins/msf_exploit.py +239 -161
  216. souleyez/plugins/nikto.py +134 -78
  217. souleyez/plugins/nmap.py +275 -91
  218. souleyez/plugins/nuclei.py +180 -89
  219. souleyez/plugins/nxc.py +285 -0
  220. souleyez/plugins/plugin_base.py +35 -36
  221. souleyez/plugins/plugin_template.py +13 -5
  222. souleyez/plugins/rdp_sec_check.py +130 -0
  223. souleyez/plugins/responder.py +112 -71
  224. souleyez/plugins/router_http_brute.py +76 -65
  225. souleyez/plugins/router_ssh_brute.py +118 -41
  226. souleyez/plugins/router_telnet_brute.py +124 -42
  227. souleyez/plugins/routersploit.py +91 -59
  228. souleyez/plugins/routersploit_exploit.py +77 -55
  229. souleyez/plugins/searchsploit.py +91 -77
  230. souleyez/plugins/service_explorer.py +1160 -0
  231. souleyez/plugins/smbmap.py +122 -72
  232. souleyez/plugins/smbpasswd.py +215 -0
  233. souleyez/plugins/sqlmap.py +301 -113
  234. souleyez/plugins/theharvester.py +127 -75
  235. souleyez/plugins/tr069.py +79 -57
  236. souleyez/plugins/upnp.py +65 -47
  237. souleyez/plugins/upnp_abuse.py +73 -55
  238. souleyez/plugins/vnc_access.py +129 -42
  239. souleyez/plugins/vnc_brute.py +109 -38
  240. souleyez/plugins/web_login_test.py +417 -0
  241. souleyez/plugins/whois.py +77 -58
  242. souleyez/plugins/wpscan.py +219 -69
  243. souleyez/reporting/__init__.py +2 -1
  244. souleyez/reporting/attack_chain.py +411 -346
  245. souleyez/reporting/charts.py +436 -501
  246. souleyez/reporting/compliance_mappings.py +334 -201
  247. souleyez/reporting/detection_report.py +126 -125
  248. souleyez/reporting/formatters.py +828 -591
  249. souleyez/reporting/generator.py +386 -302
  250. souleyez/reporting/metrics.py +72 -75
  251. souleyez/scanner.py +35 -29
  252. souleyez/security/__init__.py +37 -11
  253. souleyez/security/scope_validator.py +175 -106
  254. souleyez/security/validation.py +237 -149
  255. souleyez/security.py +22 -6
  256. souleyez/storage/credentials.py +247 -186
  257. souleyez/storage/crypto.py +296 -129
  258. souleyez/storage/database.py +73 -50
  259. souleyez/storage/db.py +58 -36
  260. souleyez/storage/deliverable_evidence.py +177 -128
  261. souleyez/storage/deliverable_exporter.py +282 -246
  262. souleyez/storage/deliverable_templates.py +134 -116
  263. souleyez/storage/deliverables.py +135 -130
  264. souleyez/storage/engagements.py +109 -56
  265. souleyez/storage/evidence.py +181 -152
  266. souleyez/storage/execution_log.py +31 -17
  267. souleyez/storage/exploit_attempts.py +93 -57
  268. souleyez/storage/exploits.py +67 -36
  269. souleyez/storage/findings.py +48 -61
  270. souleyez/storage/hosts.py +176 -144
  271. souleyez/storage/migrate_to_engagements.py +43 -19
  272. souleyez/storage/migrations/_001_add_credential_enhancements.py +22 -12
  273. souleyez/storage/migrations/_002_add_status_tracking.py +10 -7
  274. souleyez/storage/migrations/_003_add_execution_log.py +14 -8
  275. souleyez/storage/migrations/_005_screenshots.py +13 -5
  276. souleyez/storage/migrations/_006_deliverables.py +13 -5
  277. souleyez/storage/migrations/_007_deliverable_templates.py +12 -7
  278. souleyez/storage/migrations/_008_add_nuclei_table.py +10 -4
  279. souleyez/storage/migrations/_010_evidence_linking.py +17 -10
  280. souleyez/storage/migrations/_011_timeline_tracking.py +20 -13
  281. souleyez/storage/migrations/_012_team_collaboration.py +34 -21
  282. souleyez/storage/migrations/_013_add_host_tags.py +12 -6
  283. souleyez/storage/migrations/_014_exploit_attempts.py +22 -10
  284. souleyez/storage/migrations/_015_add_mac_os_fields.py +15 -7
  285. souleyez/storage/migrations/_016_add_domain_field.py +10 -4
  286. souleyez/storage/migrations/_017_msf_sessions.py +16 -8
  287. souleyez/storage/migrations/_018_add_osint_target.py +10 -6
  288. souleyez/storage/migrations/_019_add_engagement_type.py +10 -6
  289. souleyez/storage/migrations/_020_add_rbac.py +36 -15
  290. souleyez/storage/migrations/_021_wazuh_integration.py +20 -8
  291. souleyez/storage/migrations/_022_wazuh_indexer_columns.py +6 -4
  292. souleyez/storage/migrations/_023_fix_detection_results_fk.py +16 -6
  293. souleyez/storage/migrations/_024_wazuh_vulnerabilities.py +26 -10
  294. souleyez/storage/migrations/_025_multi_siem_support.py +3 -5
  295. souleyez/storage/migrations/_026_add_engagement_scope.py +31 -12
  296. souleyez/storage/migrations/_027_multi_siem_persistence.py +32 -15
  297. souleyez/storage/migrations/__init__.py +26 -26
  298. souleyez/storage/migrations/migration_manager.py +19 -19
  299. souleyez/storage/msf_sessions.py +100 -65
  300. souleyez/storage/osint.py +17 -24
  301. souleyez/storage/recommendation_engine.py +269 -235
  302. souleyez/storage/screenshots.py +33 -32
  303. souleyez/storage/smb_shares.py +136 -92
  304. souleyez/storage/sqlmap_data.py +183 -128
  305. souleyez/storage/team_collaboration.py +135 -141
  306. souleyez/storage/timeline_tracker.py +122 -94
  307. souleyez/storage/wazuh_vulns.py +64 -66
  308. souleyez/storage/web_paths.py +33 -37
  309. souleyez/testing/credential_tester.py +221 -205
  310. souleyez/ui/__init__.py +1 -1
  311. souleyez/ui/ai_quotes.py +12 -12
  312. souleyez/ui/attack_surface.py +2439 -1516
  313. souleyez/ui/chain_rules_view.py +914 -382
  314. souleyez/ui/correlation_view.py +312 -230
  315. souleyez/ui/dashboard.py +2382 -1130
  316. souleyez/ui/deliverables_view.py +148 -62
  317. souleyez/ui/design_system.py +13 -13
  318. souleyez/ui/errors.py +49 -49
  319. souleyez/ui/evidence_linking_view.py +284 -179
  320. souleyez/ui/evidence_vault.py +393 -285
  321. souleyez/ui/exploit_suggestions_view.py +555 -349
  322. souleyez/ui/export_view.py +100 -66
  323. souleyez/ui/gap_analysis_view.py +315 -171
  324. souleyez/ui/help_system.py +105 -97
  325. souleyez/ui/intelligence_view.py +436 -293
  326. souleyez/ui/interactive.py +23034 -10679
  327. souleyez/ui/interactive_selector.py +75 -68
  328. souleyez/ui/log_formatter.py +47 -39
  329. souleyez/ui/menu_components.py +22 -13
  330. souleyez/ui/msf_auxiliary_menu.py +184 -133
  331. souleyez/ui/pending_chains_view.py +336 -172
  332. souleyez/ui/progress_indicators.py +5 -3
  333. souleyez/ui/recommendations_view.py +195 -137
  334. souleyez/ui/rule_builder.py +343 -225
  335. souleyez/ui/setup_wizard.py +678 -284
  336. souleyez/ui/shortcuts.py +217 -165
  337. souleyez/ui/splunk_gap_analysis_view.py +452 -270
  338. souleyez/ui/splunk_vulns_view.py +139 -86
  339. souleyez/ui/team_dashboard.py +498 -335
  340. souleyez/ui/template_selector.py +196 -105
  341. souleyez/ui/terminal.py +6 -6
  342. souleyez/ui/timeline_view.py +198 -127
  343. souleyez/ui/tool_setup.py +264 -164
  344. souleyez/ui/tutorial.py +202 -72
  345. souleyez/ui/tutorial_state.py +40 -40
  346. souleyez/ui/wazuh_vulns_view.py +235 -141
  347. souleyez/ui/wordlist_browser.py +260 -107
  348. souleyez/ui.py +464 -312
  349. souleyez/utils/tool_checker.py +427 -367
  350. souleyez/utils.py +33 -29
  351. souleyez/wordlists.py +134 -167
  352. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/METADATA +2 -2
  353. souleyez-3.0.0.dist-info/RECORD +443 -0
  354. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/WHEEL +1 -1
  355. souleyez-2.43.29.dist-info/RECORD +0 -379
  356. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/entry_points.txt +0 -0
  357. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/licenses/LICENSE +0 -0
  358. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/top_level.txt +0 -0
@@ -11,11 +11,12 @@ Detects:
11
11
 
12
12
  This runs BEFORE web vulnerability scanners to enable smarter tool configuration.
13
13
  """
14
+
14
15
  import json
15
- import time
16
- import ssl
17
16
  import socket
18
- from typing import Dict, Any, List, Optional
17
+ import ssl
18
+ import time
19
+ from typing import Any, Dict, List, Optional
19
20
  from urllib.parse import urlparse
20
21
 
21
22
  from .plugin_base import PluginBase
@@ -45,238 +46,370 @@ HELP = {
45
46
  "presets": [
46
47
  {"name": "Quick Fingerprint", "args": [], "desc": "Fast fingerprint scan"},
47
48
  ],
49
+ "help_sections": [
50
+ {
51
+ "title": "What is HTTP Fingerprinting?",
52
+ "color": "cyan",
53
+ "content": [
54
+ (
55
+ "Overview",
56
+ [
57
+ "Lightweight reconnaissance that identifies web infrastructure",
58
+ "Runs automatically before vulnerability scanners",
59
+ "Enables smarter tool configuration based on detected technology",
60
+ ],
61
+ ),
62
+ (
63
+ "What It Detects",
64
+ [
65
+ "Server software - Apache, nginx, IIS, LiteSpeed",
66
+ "WAFs - Cloudflare, Akamai, AWS WAF, Imperva, Sucuri",
67
+ "CDNs - Cloudflare, Fastly, CloudFront, Akamai",
68
+ "Managed hosting - Squarespace, Wix, Shopify, Netlify",
69
+ ],
70
+ ),
71
+ ],
72
+ },
73
+ {
74
+ "title": "Usage & Examples",
75
+ "color": "green",
76
+ "content": [
77
+ (
78
+ "Basic Usage",
79
+ [
80
+ "souleyez jobs enqueue http_fingerprint http://example.com",
81
+ "souleyez jobs enqueue http_fingerprint https://example.com",
82
+ " → Detects server, WAF, CDN, and hosting platform",
83
+ ],
84
+ ),
85
+ ],
86
+ },
87
+ {
88
+ "title": "Why This Matters",
89
+ "color": "yellow",
90
+ "content": [
91
+ (
92
+ "Smart Tool Configuration",
93
+ [
94
+ "If Squarespace detected → skip CGI enumeration (pointless)",
95
+ "If Cloudflare WAF detected → adjust scan rate to avoid blocks",
96
+ "If nginx detected → test nginx-specific vulnerabilities",
97
+ ],
98
+ ),
99
+ (
100
+ "Attack Surface Mapping",
101
+ [
102
+ "Managed platforms have limited attack surface",
103
+ "WAFs require evasion techniques or finding bypasses",
104
+ "CDNs may hide the real origin server IP",
105
+ ],
106
+ ),
107
+ ],
108
+ },
109
+ ],
48
110
  }
49
111
 
50
112
  # WAF detection signatures
51
113
  # Format: {header_name: {value_pattern: waf_name}}
52
114
  WAF_SIGNATURES = {
53
115
  # Header-based detection
54
- 'headers': {
55
- 'server': {
56
- 'cloudflare': 'Cloudflare',
57
- 'akamaighost': 'Akamai',
58
- 'akamainetworkstorage': 'Akamai',
59
- 'awselb': 'AWS ELB',
60
- 'bigip': 'F5 BIG-IP',
61
- 'barracuda': 'Barracuda',
62
- 'denyall': 'DenyAll',
63
- 'fortigate': 'Fortinet FortiGate',
64
- 'imperva': 'Imperva',
65
- 'incapsula': 'Imperva Incapsula',
66
- 'netscaler': 'Citrix NetScaler',
67
- 'sucuri': 'Sucuri',
68
- 'wallarm': 'Wallarm',
116
+ "headers": {
117
+ "server": {
118
+ "cloudflare": "Cloudflare",
119
+ "akamaighost": "Akamai",
120
+ "akamainetworkstorage": "Akamai",
121
+ "awselb": "AWS ELB",
122
+ "bigip": "F5 BIG-IP",
123
+ "barracuda": "Barracuda",
124
+ "denyall": "DenyAll",
125
+ "fortigate": "Fortinet FortiGate",
126
+ "imperva": "Imperva",
127
+ "incapsula": "Imperva Incapsula",
128
+ "netscaler": "Citrix NetScaler",
129
+ "sucuri": "Sucuri",
130
+ "wallarm": "Wallarm",
69
131
  },
70
- 'x-powered-by': {
71
- 'aws lambda': 'AWS Lambda',
72
- 'express': 'Express.js',
73
- 'php': 'PHP',
74
- 'asp.net': 'ASP.NET',
132
+ "x-powered-by": {
133
+ "aws lambda": "AWS Lambda",
134
+ "express": "Express.js",
135
+ "php": "PHP",
136
+ "asp.net": "ASP.NET",
75
137
  },
76
- 'x-sucuri-id': {'': 'Sucuri'},
77
- 'x-sucuri-cache': {'': 'Sucuri'},
78
- 'cf-ray': {'': 'Cloudflare'},
79
- 'cf-cache-status': {'': 'Cloudflare'},
80
- 'x-amz-cf-id': {'': 'AWS CloudFront'},
81
- 'x-amz-cf-pop': {'': 'AWS CloudFront'},
82
- 'x-akamai-transformed': {'': 'Akamai'},
83
- 'x-cache': {
84
- 'cloudfront': 'AWS CloudFront',
85
- 'varnish': 'Varnish',
138
+ "x-sucuri-id": {"": "Sucuri"},
139
+ "x-sucuri-cache": {"": "Sucuri"},
140
+ "cf-ray": {"": "Cloudflare"},
141
+ "cf-cache-status": {"": "Cloudflare"},
142
+ "x-amz-cf-id": {"": "AWS CloudFront"},
143
+ "x-amz-cf-pop": {"": "AWS CloudFront"},
144
+ "x-akamai-transformed": {"": "Akamai"},
145
+ "x-cache": {
146
+ "cloudfront": "AWS CloudFront",
147
+ "varnish": "Varnish",
86
148
  },
87
- 'x-fastly-request-id': {'': 'Fastly'},
88
- 'x-served-by': {
89
- 'cache-': 'Fastly',
149
+ "x-fastly-request-id": {"": "Fastly"},
150
+ "x-served-by": {
151
+ "cache-": "Fastly",
90
152
  },
91
- 'x-cdn': {
92
- 'incapsula': 'Imperva Incapsula',
93
- 'cloudflare': 'Cloudflare',
153
+ "x-cdn": {
154
+ "incapsula": "Imperva Incapsula",
155
+ "cloudflare": "Cloudflare",
94
156
  },
95
- 'x-iinfo': {'': 'Imperva Incapsula'},
96
- 'x-proxy-id': {'': 'Imperva'},
97
- 'x-request-id': {}, # Generic, but useful context
98
- 'x-fw-protection': {'': 'Unknown WAF'},
99
- 'x-protected-by': {'': 'Unknown WAF'},
100
- 'x-waf-status': {'': 'Unknown WAF'},
101
- 'x-denied-reason': {'': 'Unknown WAF'},
157
+ "x-iinfo": {"": "Imperva Incapsula"},
158
+ "x-proxy-id": {"": "Imperva"},
159
+ "x-request-id": {}, # Generic, but useful context
160
+ "x-fw-protection": {"": "Unknown WAF"},
161
+ "x-protected-by": {"": "Unknown WAF"},
162
+ "x-waf-status": {"": "Unknown WAF"},
163
+ "x-denied-reason": {"": "Unknown WAF"},
102
164
  },
103
165
  # Cookie-based detection
104
- 'cookies': {
105
- '__cfduid': 'Cloudflare',
106
- 'cf_clearance': 'Cloudflare',
107
- '__cf_bm': 'Cloudflare Bot Management',
108
- 'incap_ses': 'Imperva Incapsula',
109
- 'visid_incap': 'Imperva Incapsula',
110
- 'nlbi_': 'Imperva Incapsula',
111
- 'ak_bmsc': 'Akamai Bot Manager',
112
- 'bm_sz': 'Akamai Bot Manager',
113
- '_abck': 'Akamai Bot Manager',
114
- 'awsalb': 'AWS ALB',
115
- 'awsalbcors': 'AWS ALB',
116
- 'ts': 'F5 BIG-IP',
117
- 'bigipserver': 'F5 BIG-IP',
118
- 'citrix_ns_id': 'Citrix NetScaler',
119
- 'sucuri_cloudproxy': 'Sucuri',
166
+ "cookies": {
167
+ "__cfduid": "Cloudflare",
168
+ "cf_clearance": "Cloudflare",
169
+ "__cf_bm": "Cloudflare Bot Management",
170
+ "incap_ses": "Imperva Incapsula",
171
+ "visid_incap": "Imperva Incapsula",
172
+ "nlbi_": "Imperva Incapsula",
173
+ "ak_bmsc": "Akamai Bot Manager",
174
+ "bm_sz": "Akamai Bot Manager",
175
+ "_abck": "Akamai Bot Manager",
176
+ "awsalb": "AWS ALB",
177
+ "awsalbcors": "AWS ALB",
178
+ "ts": "F5 BIG-IP",
179
+ "bigipserver": "F5 BIG-IP",
180
+ "citrix_ns_id": "Citrix NetScaler",
181
+ "sucuri_cloudproxy": "Sucuri",
120
182
  },
121
183
  }
122
184
 
123
185
  # CDN detection signatures
124
186
  CDN_SIGNATURES = {
125
- 'headers': {
126
- 'cf-ray': 'Cloudflare',
127
- 'cf-cache-status': 'Cloudflare',
128
- 'x-amz-cf-id': 'AWS CloudFront',
129
- 'x-amz-cf-pop': 'AWS CloudFront',
130
- 'x-cache': {
131
- 'cloudfront': 'AWS CloudFront',
132
- 'hit from cloudfront': 'AWS CloudFront',
187
+ "headers": {
188
+ "cf-ray": "Cloudflare",
189
+ "cf-cache-status": "Cloudflare",
190
+ "x-amz-cf-id": "AWS CloudFront",
191
+ "x-amz-cf-pop": "AWS CloudFront",
192
+ "x-cache": {
193
+ "cloudfront": "AWS CloudFront",
194
+ "hit from cloudfront": "AWS CloudFront",
133
195
  },
134
- 'x-fastly-request-id': 'Fastly',
135
- 'x-served-by': 'Fastly',
136
- 'x-akamai-transformed': 'Akamai',
137
- 'x-akamai-request-id': 'Akamai',
138
- 'x-edge-location': 'Generic CDN',
139
- 'x-cdn': 'Generic CDN',
140
- 'x-cache-status': 'Generic CDN',
141
- 'x-varnish': 'Varnish',
142
- 'via': {
143
- 'cloudfront': 'AWS CloudFront',
144
- 'varnish': 'Varnish',
145
- 'akamai': 'Akamai',
196
+ "x-fastly-request-id": "Fastly",
197
+ "x-served-by": "Fastly",
198
+ "x-akamai-transformed": "Akamai",
199
+ "x-akamai-request-id": "Akamai",
200
+ "x-edge-location": "Generic CDN",
201
+ "x-cdn": "Generic CDN",
202
+ "x-cache-status": "Generic CDN",
203
+ "x-varnish": "Varnish",
204
+ "via": {
205
+ "cloudfront": "AWS CloudFront",
206
+ "varnish": "Varnish",
207
+ "akamai": "Akamai",
146
208
  },
147
- 'x-azure-ref': 'Azure CDN',
148
- 'x-msedge-ref': 'Azure CDN',
149
- 'x-goog-': 'Google Cloud CDN',
150
- 'x-bunny-': 'Bunny CDN',
151
- 'x-hw': 'Huawei CDN',
209
+ "x-azure-ref": "Azure CDN",
210
+ "x-msedge-ref": "Azure CDN",
211
+ "x-goog-": "Google Cloud CDN",
212
+ "x-bunny-": "Bunny CDN",
213
+ "x-hw": "Huawei CDN",
152
214
  },
153
- 'server': {
154
- 'cloudflare': 'Cloudflare',
155
- 'akamaighost': 'Akamai',
156
- 'cloudfront': 'AWS CloudFront',
157
- 'fastly': 'Fastly',
158
- 'varnish': 'Varnish',
159
- 'keycdn': 'KeyCDN',
160
- 'bunnycdn': 'Bunny CDN',
161
- 'cdn77': 'CDN77',
162
- 'stackpath': 'StackPath',
163
- 'limelight': 'Limelight',
164
- 'azure': 'Azure CDN',
215
+ "server": {
216
+ "cloudflare": "Cloudflare",
217
+ "akamaighost": "Akamai",
218
+ "cloudfront": "AWS CloudFront",
219
+ "fastly": "Fastly",
220
+ "varnish": "Varnish",
221
+ "keycdn": "KeyCDN",
222
+ "bunnycdn": "Bunny CDN",
223
+ "cdn77": "CDN77",
224
+ "stackpath": "StackPath",
225
+ "limelight": "Limelight",
226
+ "azure": "Azure CDN",
165
227
  },
166
228
  }
167
229
 
168
230
  # Managed hosting platform signatures
169
231
  MANAGED_HOSTING_SIGNATURES = {
170
- 'server': {
171
- 'squarespace': 'Squarespace',
172
- 'wix': 'Wix',
173
- 'shopify': 'Shopify',
174
- 'weebly': 'Weebly',
175
- 'webflow': 'Webflow',
176
- 'ghost': 'Ghost',
177
- 'medium': 'Medium',
178
- 'tumblr': 'Tumblr',
179
- 'blogger': 'Blogger/Blogspot',
180
- 'wordpress.com': 'WordPress.com',
181
- 'netlify': 'Netlify',
182
- 'vercel': 'Vercel',
183
- 'heroku': 'Heroku',
184
- 'github': 'GitHub Pages',
185
- 'gitlab': 'GitLab Pages',
186
- 'firebase': 'Firebase Hosting',
187
- 'render': 'Render',
188
- 'railway': 'Railway',
189
- 'fly': 'Fly.io',
190
- 'deno': 'Deno Deploy',
232
+ "server": {
233
+ "squarespace": "Squarespace",
234
+ "wix": "Wix",
235
+ "shopify": "Shopify",
236
+ "weebly": "Weebly",
237
+ "webflow": "Webflow",
238
+ "ghost": "Ghost",
239
+ "medium": "Medium",
240
+ "tumblr": "Tumblr",
241
+ "blogger": "Blogger/Blogspot",
242
+ "wordpress.com": "WordPress.com",
243
+ "netlify": "Netlify",
244
+ "vercel": "Vercel",
245
+ "heroku": "Heroku",
246
+ "github": "GitHub Pages",
247
+ "gitlab": "GitLab Pages",
248
+ "firebase": "Firebase Hosting",
249
+ "render": "Render",
250
+ "railway": "Railway",
251
+ "fly": "Fly.io",
252
+ "deno": "Deno Deploy",
191
253
  },
192
- 'headers': {
193
- 'x-shopify-stage': 'Shopify',
194
- 'x-shopify-request-id': 'Shopify',
195
- 'x-wix-request-id': 'Wix',
196
- 'x-wix-renderer-server': 'Wix',
197
- 'x-sqsp-edge': 'Squarespace',
198
- 'x-squarespace-': 'Squarespace',
199
- 'x-ghost-': 'Ghost',
200
- 'x-medium-content': 'Medium',
201
- 'x-tumblr-': 'Tumblr',
202
- 'x-blogger-': 'Blogger/Blogspot',
203
- 'x-netlify-': 'Netlify',
204
- 'x-nf-request-id': 'Netlify',
205
- 'x-vercel-': 'Vercel',
206
- 'x-vercel-id': 'Vercel',
207
- 'x-heroku-': 'Heroku',
208
- 'x-github-request-id': 'GitHub Pages',
209
- 'x-firebase-': 'Firebase Hosting',
210
- 'x-render-origin-server': 'Render',
211
- 'fly-request-id': 'Fly.io',
254
+ "headers": {
255
+ "x-shopify-stage": "Shopify",
256
+ "x-shopify-request-id": "Shopify",
257
+ "x-wix-request-id": "Wix",
258
+ "x-wix-renderer-server": "Wix",
259
+ "x-sqsp-edge": "Squarespace",
260
+ "x-squarespace-": "Squarespace",
261
+ "x-ghost-": "Ghost",
262
+ "x-medium-content": "Medium",
263
+ "x-tumblr-": "Tumblr",
264
+ "x-blogger-": "Blogger/Blogspot",
265
+ "x-netlify-": "Netlify",
266
+ "x-nf-request-id": "Netlify",
267
+ "x-vercel-": "Vercel",
268
+ "x-vercel-id": "Vercel",
269
+ "x-heroku-": "Heroku",
270
+ "x-github-request-id": "GitHub Pages",
271
+ "x-firebase-": "Firebase Hosting",
272
+ "x-render-origin-server": "Render",
273
+ "fly-request-id": "Fly.io",
212
274
  },
213
- 'cookies': {
214
- 'wordpress_': 'WordPress',
215
- 'wp-settings': 'WordPress',
216
- '_shopify_': 'Shopify',
217
- 'wixSession': 'Wix',
275
+ "cookies": {
276
+ "wordpress_": "WordPress",
277
+ "wp-settings": "WordPress",
278
+ "_shopify_": "Shopify",
279
+ "wixSession": "Wix",
218
280
  },
219
281
  }
220
282
 
221
283
  # Server software signatures
222
284
  SERVER_SIGNATURES = {
223
- 'apache': 'Apache',
224
- 'nginx': 'nginx',
225
- 'microsoft-iis': 'Microsoft IIS',
226
- 'iis': 'Microsoft IIS',
227
- 'lighttpd': 'lighttpd',
228
- 'litespeed': 'LiteSpeed',
229
- 'openresty': 'OpenResty',
230
- 'caddy': 'Caddy',
231
- 'tomcat': 'Apache Tomcat',
232
- 'jetty': 'Eclipse Jetty',
233
- 'gunicorn': 'Gunicorn',
234
- 'uvicorn': 'Uvicorn',
235
- 'werkzeug': 'Werkzeug (Flask)',
236
- 'waitress': 'Waitress',
237
- 'cowboy': 'Cowboy (Erlang)',
238
- 'kestrel': 'Kestrel (ASP.NET)',
239
- 'express': 'Express.js',
285
+ "apache": "Apache",
286
+ "nginx": "nginx",
287
+ "microsoft-iis": "Microsoft IIS",
288
+ "iis": "Microsoft IIS",
289
+ "lighttpd": "lighttpd",
290
+ "litespeed": "LiteSpeed",
291
+ "openresty": "OpenResty",
292
+ "caddy": "Caddy",
293
+ "tomcat": "Apache Tomcat",
294
+ "jetty": "Eclipse Jetty",
295
+ "gunicorn": "Gunicorn",
296
+ "uvicorn": "Uvicorn",
297
+ "werkzeug": "Werkzeug (Flask)",
298
+ "waitress": "Waitress",
299
+ "cowboy": "Cowboy (Erlang)",
300
+ "kestrel": "Kestrel (ASP.NET)",
301
+ "express": "Express.js",
240
302
  }
241
303
 
242
304
 
243
305
  class HttpFingerprintPlugin(PluginBase):
244
306
  name = "HTTP Fingerprint"
245
307
  tool = "http_fingerprint"
246
- category = "recon"
308
+ category = "scanning"
247
309
  HELP = HELP
248
310
 
249
- def build_command(self, target: str, args: List[str] = None, label: str = "", log_path: str = None):
311
+ def build_command(
312
+ self, target: str, args: List[str] = None, label: str = "", log_path: str = None
313
+ ):
250
314
  """
251
315
  HTTP fingerprinting is done in Python, not via external command.
252
316
  Return None to use run() method instead.
253
317
  """
254
318
  return None
255
319
 
256
- def run(self, target: str, args: List[str] = None, label: str = "", log_path: str = None) -> int:
257
- """Execute HTTP fingerprint scan."""
320
+ def run(
321
+ self, target: str, args: List[str] = None, label: str = "", log_path: str = None
322
+ ) -> int:
323
+ """Execute HTTP fingerprint scan with smart protocol detection."""
258
324
  args = args or []
259
325
  timeout = 10
260
326
 
261
327
  # Parse timeout from args
262
328
  for i, arg in enumerate(args):
263
- if arg == '--timeout' and i + 1 < len(args):
329
+ if arg == "--timeout" and i + 1 < len(args):
264
330
  try:
265
331
  timeout = int(args[i + 1])
266
332
  except ValueError:
267
333
  pass
268
334
 
269
335
  # Ensure target has scheme
270
- if not target.startswith(('http://', 'https://')):
271
- target = f'http://{target}'
336
+ if not target.startswith(("http://", "https://")):
337
+ target = f"http://{target}"
272
338
 
273
339
  try:
274
- result = self._fingerprint(target, timeout)
275
- output = self._format_output(target, result, label)
340
+ # Use thread-based hard timeout to prevent indefinite hangs
341
+ # urllib timeouts don't always work if server accepts connection but stalls
342
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout
343
+
344
+ hard_timeout = timeout * 3 # 30 seconds max for entire probe operation
345
+
346
+ with ThreadPoolExecutor(max_workers=1) as executor:
347
+ future = executor.submit(self._smart_probe, target, timeout)
348
+ try:
349
+ result, effective_url = future.result(timeout=hard_timeout)
350
+ except FuturesTimeout:
351
+ # Hard timeout hit - server is unresponsive
352
+ result = {
353
+ "error": f"Timeout: server did not respond within {hard_timeout}s",
354
+ "status_code": None,
355
+ "server": None,
356
+ "waf": [],
357
+ "cdn": [],
358
+ "managed_hosting": None,
359
+ "technologies": [],
360
+ "headers": {},
361
+ "cookies": [],
362
+ "tls": None,
363
+ "redirect_url": None,
364
+ }
365
+ effective_url = target
366
+
367
+ output = self._format_output(effective_url, result, label)
276
368
 
277
369
  if log_path:
278
- with open(log_path, 'a', encoding='utf-8', errors='replace') as fh:
370
+ with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
279
371
  fh.write(output)
372
+
373
+ # Skip additional probing if initial fingerprint failed
374
+ if not result.get("error"):
375
+ # Fetch robots.txt and sitemap.xml for path discovery
376
+ robots_paths, sitemap_paths = self._fetch_robots_sitemap(
377
+ effective_url, timeout
378
+ )
379
+ result["robots_paths"] = robots_paths
380
+ result["sitemap_paths"] = sitemap_paths
381
+
382
+ # Quick path probing for CMS, admin panels, API endpoints
383
+ quick_probe = self._quick_path_probe(effective_url, timeout)
384
+ result["cms_detected"] = quick_probe.get("cms")
385
+ result["admin_panels"] = quick_probe.get("admin_panels", [])
386
+ result["api_endpoints"] = quick_probe.get("api_endpoints", [])
387
+
388
+ # Write additional detections to log
389
+ if quick_probe.get("cms"):
390
+ cms = quick_probe["cms"]
391
+ fh.write(f"\n{'=' * 40}\n")
392
+ fh.write(
393
+ f"CMS DETECTED: {cms['name']} ({cms['confidence']} confidence)\n"
394
+ )
395
+ for p in cms["paths"]:
396
+ fh.write(f" - {p['path']} (HTTP {p['status']})\n")
397
+ fh.write(f"{'=' * 40}\n")
398
+
399
+ if quick_probe.get("admin_panels"):
400
+ fh.write(f"\nADMIN PANELS FOUND:\n")
401
+ for panel in quick_probe["admin_panels"]:
402
+ fh.write(
403
+ f" - {panel['name']}: {panel['url']} (HTTP {panel['status']})\n"
404
+ )
405
+
406
+ if quick_probe.get("api_endpoints"):
407
+ fh.write(f"\nAPI ENDPOINTS FOUND:\n")
408
+ for api in quick_probe["api_endpoints"]:
409
+ fh.write(
410
+ f" - {api['type']}: {api['url']} (HTTP {api['status']})\n"
411
+ )
412
+
280
413
  # Write JSON result for parsing
281
414
  fh.write("\n\n=== JSON_RESULT ===\n")
282
415
  fh.write(json.dumps(result, indent=2))
@@ -290,11 +423,181 @@ class HttpFingerprintPlugin(PluginBase):
290
423
  error_output += f"Error: {type(e).__name__}: {e}\n"
291
424
 
292
425
  if log_path:
293
- with open(log_path, 'a', encoding='utf-8', errors='replace') as fh:
426
+ with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
294
427
  fh.write(error_output)
295
428
 
296
429
  return 1
297
430
 
431
+ def _smart_probe(self, target: str, timeout: int = 10) -> tuple:
432
+ """
433
+ Smart protocol detection: probe both HTTP and HTTPS, return the better result.
434
+
435
+ This handles cases where:
436
+ - nmap reports HTTP but server is actually HTTPS
437
+ - Server serves different content on HTTP vs HTTPS
438
+ - HTTP redirects to HTTPS (or vice versa)
439
+
440
+ Returns:
441
+ tuple: (result_dict, effective_url)
442
+ """
443
+ parsed = urlparse(target)
444
+
445
+ # Quick connectivity check - fail fast if port isn't responding
446
+ host = parsed.hostname
447
+ port = parsed.port or (443 if parsed.scheme == "https" else 80)
448
+ try:
449
+ with socket.create_connection((host, port), timeout=min(timeout, 5)) as sock:
450
+ pass # Just checking if we can connect
451
+ except (socket.timeout, socket.error, OSError) as e:
452
+ # Port not responding - return error result immediately
453
+ return {
454
+ "error": f"Connection failed: {e}",
455
+ "status_code": None,
456
+ "server": None,
457
+ "waf": [],
458
+ "cdn": [],
459
+ "managed_hosting": None,
460
+ "technologies": [],
461
+ "headers": {},
462
+ "cookies": [],
463
+ "tls": None,
464
+ "redirect_url": None,
465
+ "protocol_detection": "failed",
466
+ "effective_url": target,
467
+ }, target
468
+
469
+ # Build both URL variants
470
+ http_url = (
471
+ f"http://{host}:{port}"
472
+ if port not in (80, 443)
473
+ else f"http://{host}" if port == 80 else f"http://{host}:{port}"
474
+ )
475
+ https_url = (
476
+ f"https://{host}:{port}"
477
+ if port not in (80, 443)
478
+ else f"https://{host}" if port == 443 else f"https://{host}:{port}"
479
+ )
480
+
481
+ # Handle standard ports correctly
482
+ if port == 80:
483
+ http_url = f"http://{host}"
484
+ https_url = f"https://{host}:80" # Non-standard HTTPS on port 80
485
+ elif port == 443:
486
+ http_url = f"http://{host}:443" # Non-standard HTTP on port 443
487
+ https_url = f"https://{host}"
488
+ else:
489
+ http_url = f"http://{host}:{port}"
490
+ https_url = f"https://{host}:{port}"
491
+
492
+ # Probe the original protocol first
493
+ original_is_https = parsed.scheme == "https"
494
+ primary_url = target
495
+ alternate_url = https_url if not original_is_https else http_url
496
+
497
+ # Probe primary (original) URL
498
+ primary_result = self._fingerprint(primary_url, timeout)
499
+
500
+ # Calculate "richness" score for primary result
501
+ primary_score = self._calculate_result_richness(primary_result)
502
+ primary_status = primary_result.get("status_code") or 0
503
+
504
+ # Check if primary result is "good enough" to skip alternate probe
505
+ # Must have: successful status (2xx/3xx), decent score, no errors
506
+ # 4xx/5xx status means we MUST try alternate protocol (could be wrong protocol)
507
+ primary_is_successful = 200 <= primary_status < 400
508
+
509
+ if (
510
+ primary_is_successful
511
+ and primary_score >= 3
512
+ and not primary_result.get("error")
513
+ ):
514
+ primary_result["protocol_detection"] = "primary"
515
+ primary_result["effective_url"] = primary_url
516
+ return primary_result, primary_url
517
+
518
+ # Otherwise, probe alternate protocol (primary failed, errored, or got 4xx/5xx)
519
+ alternate_result = self._fingerprint(alternate_url, timeout)
520
+ alternate_score = self._calculate_result_richness(alternate_result)
521
+
522
+ # Compare and choose the better result
523
+ if alternate_score > primary_score and not alternate_result.get("error"):
524
+ # Alternate protocol is better
525
+ alternate_result["protocol_detection"] = "upgraded"
526
+ alternate_result["protocol_note"] = (
527
+ f"Switched from {parsed.scheme.upper()} to {'HTTPS' if not original_is_https else 'HTTP'} (richer response)"
528
+ )
529
+ alternate_result["original_url"] = primary_url
530
+ alternate_result["effective_url"] = alternate_url
531
+ return alternate_result, alternate_url
532
+ elif not primary_result.get("error"):
533
+ # Primary is fine or equal
534
+ primary_result["protocol_detection"] = "primary"
535
+ primary_result["effective_url"] = primary_url
536
+ return primary_result, primary_url
537
+ elif not alternate_result.get("error"):
538
+ # Primary failed, alternate works
539
+ alternate_result["protocol_detection"] = "fallback"
540
+ alternate_result["protocol_note"] = (
541
+ f"Primary ({parsed.scheme.upper()}) failed, using {'HTTPS' if not original_is_https else 'HTTP'}"
542
+ )
543
+ alternate_result["original_url"] = primary_url
544
+ alternate_result["effective_url"] = alternate_url
545
+ return alternate_result, alternate_url
546
+ else:
547
+ # Both failed, return primary with error
548
+ primary_result["protocol_detection"] = "failed"
549
+ primary_result["effective_url"] = primary_url
550
+ return primary_result, primary_url
551
+
552
+ def _calculate_result_richness(self, result: Dict[str, Any]) -> int:
553
+ """
554
+ Calculate a "richness" score for fingerprint results.
555
+ Higher score = more useful/valid response.
556
+ """
557
+ score = 0
558
+
559
+ # Error = bad
560
+ if result.get("error"):
561
+ return 0
562
+
563
+ # Status code scoring
564
+ status = result.get("status_code")
565
+ if status == 200:
566
+ score += 3
567
+ elif status in (301, 302, 303, 307, 308):
568
+ score += 2 # Redirects are informative
569
+ elif status in (401, 403):
570
+ score += 2 # Auth required = real service
571
+ elif status in (404, 500, 502, 503):
572
+ score += 1 # At least it responded
573
+
574
+ # Has server header
575
+ if result.get("server"):
576
+ score += 1
577
+
578
+ # Has technologies detected
579
+ if result.get("technologies"):
580
+ score += len(result["technologies"])
581
+
582
+ # Has TLS info (means HTTPS worked)
583
+ if result.get("tls"):
584
+ score += 2
585
+
586
+ # Has WAF/CDN detection
587
+ if result.get("waf"):
588
+ score += 1
589
+ if result.get("cdn"):
590
+ score += 1
591
+
592
+ # Has headers (more headers = richer response)
593
+ headers = result.get("headers", {})
594
+ if len(headers) > 5:
595
+ score += 2
596
+ elif len(headers) > 0:
597
+ score += 1
598
+
599
+ return score
600
+
298
601
  def _fingerprint(self, url: str, timeout: int = 10) -> Dict[str, Any]:
299
602
  """
300
603
  Perform HTTP fingerprinting on target URL.
@@ -308,46 +611,54 @@ class HttpFingerprintPlugin(PluginBase):
308
611
  - technologies: List of detected technologies
309
612
  - tls: TLS/SSL information (for HTTPS)
310
613
  """
311
- import urllib.request
312
614
  import urllib.error
615
+ import urllib.request
616
+
617
+ # Set global socket timeout to prevent hanging on slow/unresponsive servers
618
+ # This is a safety net - individual requests also have timeouts
619
+ old_timeout = socket.getdefaulttimeout()
620
+ socket.setdefaulttimeout(timeout + 5) # Slightly longer than request timeout
313
621
 
314
622
  result = {
315
- 'server': None,
316
- 'server_version': None,
317
- 'waf': [],
318
- 'cdn': [],
319
- 'managed_hosting': None,
320
- 'technologies': [],
321
- 'headers': {},
322
- 'cookies': [],
323
- 'tls': None,
324
- 'status_code': None,
325
- 'redirect_url': None,
623
+ "server": None,
624
+ "server_version": None,
625
+ "waf": [],
626
+ "cdn": [],
627
+ "managed_hosting": None,
628
+ "technologies": [],
629
+ "headers": {},
630
+ "cookies": [],
631
+ "tls": None,
632
+ "status_code": None,
633
+ "redirect_url": None,
326
634
  }
327
635
 
328
636
  parsed = urlparse(url)
329
637
 
330
638
  # Security: Only allow http/https schemes (B310 - prevent file:// or custom schemes)
331
- if parsed.scheme not in ('http', 'https'):
332
- result['error'] = f"Invalid URL scheme: {parsed.scheme}. Only http/https allowed."
639
+ if parsed.scheme not in ("http", "https"):
640
+ result["error"] = (
641
+ f"Invalid URL scheme: {parsed.scheme}. Only http/https allowed."
642
+ )
333
643
  return result
334
644
 
335
- is_https = parsed.scheme == 'https'
645
+ is_https = parsed.scheme == "https"
336
646
 
337
647
  # Check if target is an IP address (for special handling)
338
648
  import re
339
- is_ip_target = bool(re.match(r'^(\d{1,3}\.){3}\d{1,3}$', parsed.hostname or ''))
649
+
650
+ is_ip_target = bool(re.match(r"^(\d{1,3}\.){3}\d{1,3}$", parsed.hostname or ""))
340
651
 
341
652
  # Create request with common browser headers
342
653
  req = urllib.request.Request(
343
654
  url,
344
655
  headers={
345
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
346
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
347
- 'Accept-Language': 'en-US,en;q=0.5',
348
- 'Accept-Encoding': 'identity',
349
- 'Connection': 'close',
350
- }
656
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
657
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
658
+ "Accept-Language": "en-US,en;q=0.5",
659
+ "Accept-Encoding": "identity",
660
+ "Connection": "close",
661
+ },
351
662
  )
352
663
 
353
664
  # Always create SSL context with verification disabled
@@ -360,79 +671,93 @@ class HttpFingerprintPlugin(PluginBase):
360
671
  # Get TLS info for HTTPS targets
361
672
  if is_https:
362
673
  try:
363
- with socket.create_connection((parsed.hostname, parsed.port or 443), timeout=timeout) as sock:
364
- with ctx.wrap_socket(sock, server_hostname=parsed.hostname) as ssock:
674
+ with socket.create_connection(
675
+ (parsed.hostname, parsed.port or 443), timeout=timeout
676
+ ) as sock:
677
+ with ctx.wrap_socket(
678
+ sock, server_hostname=parsed.hostname
679
+ ) as ssock:
365
680
  cert = ssock.getpeercert(binary_form=True)
366
681
  cipher = ssock.cipher()
367
682
  version = ssock.version()
368
- result['tls'] = {
369
- 'version': version,
370
- 'cipher': cipher[0] if cipher else None,
371
- 'bits': cipher[2] if cipher else None,
683
+ result["tls"] = {
684
+ "version": version,
685
+ "cipher": cipher[0] if cipher else None,
686
+ "bits": cipher[2] if cipher else None,
372
687
  }
373
688
  except Exception:
374
689
  pass # TLS info is optional
375
690
 
376
691
  # Always pass SSL context (handles HTTP->HTTPS redirects)
377
- response = urllib.request.urlopen(req, timeout=timeout, context=ctx) # nosec B310 - scheme validated above
692
+ response = urllib.request.urlopen(
693
+ req, timeout=timeout, context=ctx
694
+ ) # nosec B310 - scheme validated above
378
695
 
379
- result['status_code'] = response.getcode()
696
+ result["status_code"] = response.getcode()
380
697
 
381
698
  # Get headers
382
699
  headers = {k.lower(): v for k, v in response.headers.items()}
383
- result['headers'] = dict(response.headers)
700
+ result["headers"] = dict(response.headers)
384
701
 
385
702
  # Check for redirect
386
703
  if response.geturl() != url:
387
- result['redirect_url'] = response.geturl()
704
+ result["redirect_url"] = response.geturl()
388
705
 
389
706
  # Parse cookies
390
- if 'set-cookie' in headers:
391
- cookies = headers.get('set-cookie', '')
392
- result['cookies'] = [c.strip() for c in cookies.split(',')]
707
+ if "set-cookie" in headers:
708
+ cookies = headers.get("set-cookie", "")
709
+ result["cookies"] = [c.strip() for c in cookies.split(",")]
393
710
 
394
711
  # Detect server
395
- server_header = headers.get('server', '').lower()
396
- result['server'] = headers.get('server')
712
+ server_header = headers.get("server", "").lower()
713
+ result["server"] = headers.get("server")
397
714
 
398
715
  for sig, name in SERVER_SIGNATURES.items():
399
716
  if sig in server_header:
400
- result['server_version'] = name
401
- result['technologies'].append(name)
717
+ result["server_version"] = name
718
+ result["technologies"].append(name)
402
719
  break
403
720
 
404
721
  # Detect WAF
405
- result['waf'] = self._detect_waf(headers, result['cookies'])
722
+ result["waf"] = self._detect_waf(headers, result["cookies"])
406
723
 
407
724
  # Detect CDN
408
- result['cdn'] = self._detect_cdn(headers, server_header)
725
+ result["cdn"] = self._detect_cdn(headers, server_header)
409
726
 
410
727
  # Detect managed hosting
411
- result['managed_hosting'] = self._detect_managed_hosting(headers, server_header, result['cookies'])
728
+ result["managed_hosting"] = self._detect_managed_hosting(
729
+ headers, server_header, result["cookies"]
730
+ )
412
731
 
413
732
  # Detect technologies from headers
414
733
  self._detect_technologies(headers, result)
415
734
 
416
735
  except urllib.error.HTTPError as e:
417
736
  # Even errors give us useful headers
418
- result['status_code'] = e.code
737
+ result["status_code"] = e.code
419
738
  headers = {k.lower(): v for k, v in e.headers.items()}
420
- result['headers'] = dict(e.headers)
421
- result['server'] = headers.get('server')
739
+ result["headers"] = dict(e.headers)
740
+ result["server"] = headers.get("server")
422
741
 
423
- server_header = headers.get('server', '').lower()
424
- result['waf'] = self._detect_waf(headers, [])
425
- result['cdn'] = self._detect_cdn(headers, server_header)
426
- result['managed_hosting'] = self._detect_managed_hosting(headers, server_header, [])
742
+ server_header = headers.get("server", "").lower()
743
+ result["waf"] = self._detect_waf(headers, [])
744
+ result["cdn"] = self._detect_cdn(headers, server_header)
745
+ result["managed_hosting"] = self._detect_managed_hosting(
746
+ headers, server_header, []
747
+ )
427
748
 
428
749
  except urllib.error.URLError as e:
429
- result['error'] = str(e.reason)
750
+ result["error"] = str(e.reason)
430
751
 
431
752
  except socket.timeout:
432
- result['error'] = 'Connection timed out'
753
+ result["error"] = "Connection timed out"
433
754
 
434
755
  except Exception as e:
435
- result['error'] = f'{type(e).__name__}: {e}'
756
+ result["error"] = f"{type(e).__name__}: {e}"
757
+
758
+ finally:
759
+ # Restore original socket timeout
760
+ socket.setdefaulttimeout(old_timeout)
436
761
 
437
762
  return result
438
763
 
@@ -441,20 +766,20 @@ class HttpFingerprintPlugin(PluginBase):
441
766
  detected = []
442
767
 
443
768
  # Check headers
444
- for header, signatures in WAF_SIGNATURES['headers'].items():
445
- header_val = headers.get(header, '').lower()
769
+ for header, signatures in WAF_SIGNATURES["headers"].items():
770
+ header_val = headers.get(header, "").lower()
446
771
  if header_val:
447
772
  if isinstance(signatures, dict):
448
773
  for sig, waf_name in signatures.items():
449
- if sig == '' or sig in header_val:
774
+ if sig == "" or sig in header_val:
450
775
  if waf_name and waf_name not in detected:
451
776
  detected.append(waf_name)
452
777
  elif isinstance(signatures, str) and signatures not in detected:
453
778
  detected.append(signatures)
454
779
 
455
780
  # Check cookies
456
- cookie_str = ' '.join(cookies).lower()
457
- for cookie_sig, waf_name in WAF_SIGNATURES['cookies'].items():
781
+ cookie_str = " ".join(cookies).lower()
782
+ for cookie_sig, waf_name in WAF_SIGNATURES["cookies"].items():
458
783
  if cookie_sig.lower() in cookie_str:
459
784
  if waf_name not in detected:
460
785
  detected.append(waf_name)
@@ -466,8 +791,8 @@ class HttpFingerprintPlugin(PluginBase):
466
791
  detected = []
467
792
 
468
793
  # Check specific headers
469
- for header, cdn_info in CDN_SIGNATURES['headers'].items():
470
- header_val = headers.get(header, '').lower()
794
+ for header, cdn_info in CDN_SIGNATURES["headers"].items():
795
+ header_val = headers.get(header, "").lower()
471
796
  if header_val:
472
797
  if isinstance(cdn_info, dict):
473
798
  for sig, cdn_name in cdn_info.items():
@@ -477,28 +802,30 @@ class HttpFingerprintPlugin(PluginBase):
477
802
  detected.append(cdn_info)
478
803
 
479
804
  # Check server header
480
- for sig, cdn_name in CDN_SIGNATURES['server'].items():
805
+ for sig, cdn_name in CDN_SIGNATURES["server"].items():
481
806
  if sig in server_header and cdn_name not in detected:
482
807
  detected.append(cdn_name)
483
808
 
484
809
  return detected
485
810
 
486
- def _detect_managed_hosting(self, headers: Dict[str, str], server_header: str, cookies: List[str]) -> Optional[str]:
811
+ def _detect_managed_hosting(
812
+ self, headers: Dict[str, str], server_header: str, cookies: List[str]
813
+ ) -> Optional[str]:
487
814
  """Detect managed hosting platform."""
488
815
  # Check server header first (most reliable)
489
- for sig, platform in MANAGED_HOSTING_SIGNATURES['server'].items():
816
+ for sig, platform in MANAGED_HOSTING_SIGNATURES["server"].items():
490
817
  if sig in server_header:
491
818
  return platform
492
819
 
493
820
  # Check specific headers
494
- for header_prefix, platform in MANAGED_HOSTING_SIGNATURES['headers'].items():
821
+ for header_prefix, platform in MANAGED_HOSTING_SIGNATURES["headers"].items():
495
822
  for header in headers:
496
823
  if header.lower().startswith(header_prefix.lower()):
497
824
  return platform
498
825
 
499
826
  # Check cookies
500
- cookie_str = ' '.join(cookies).lower()
501
- for cookie_sig, platform in MANAGED_HOSTING_SIGNATURES['cookies'].items():
827
+ cookie_str = " ".join(cookies).lower()
828
+ for cookie_sig, platform in MANAGED_HOSTING_SIGNATURES["cookies"].items():
502
829
  if cookie_sig.lower() in cookie_str:
503
830
  return platform
504
831
 
@@ -506,31 +833,309 @@ class HttpFingerprintPlugin(PluginBase):
506
833
 
507
834
  def _detect_technologies(self, headers: Dict[str, str], result: Dict[str, Any]):
508
835
  """Detect additional technologies from headers."""
509
- techs = result['technologies']
836
+ techs = result["technologies"]
510
837
 
511
838
  # X-Powered-By
512
- powered_by = headers.get('x-powered-by', '')
839
+ powered_by = headers.get("x-powered-by", "")
513
840
  if powered_by:
514
- if 'php' in powered_by.lower():
515
- techs.append(f'PHP ({powered_by})')
516
- elif 'asp.net' in powered_by.lower():
517
- techs.append(f'ASP.NET ({powered_by})')
518
- elif 'express' in powered_by.lower():
519
- techs.append('Express.js')
841
+ if "php" in powered_by.lower():
842
+ techs.append(f"PHP ({powered_by})")
843
+ elif "asp.net" in powered_by.lower():
844
+ techs.append(f"ASP.NET ({powered_by})")
845
+ elif "express" in powered_by.lower():
846
+ techs.append("Express.js")
520
847
  elif powered_by not in techs:
521
848
  techs.append(powered_by)
522
849
 
523
850
  # X-AspNet-Version
524
- aspnet_ver = headers.get('x-aspnet-version', '')
851
+ aspnet_ver = headers.get("x-aspnet-version", "")
525
852
  if aspnet_ver:
526
- techs.append(f'ASP.NET {aspnet_ver}')
853
+ techs.append(f"ASP.NET {aspnet_ver}")
527
854
 
528
855
  # X-Generator
529
- generator = headers.get('x-generator', '')
856
+ generator = headers.get("x-generator", "")
530
857
  if generator:
531
858
  techs.append(generator)
532
859
 
533
- result['technologies'] = list(set(techs))
860
+ result["technologies"] = list(set(techs))
861
+
862
+ def _fetch_robots_sitemap(self, base_url: str, timeout: int = 10) -> tuple:
863
+ """
864
+ Fetch robots.txt and sitemap.xml to extract paths for discovery.
865
+
866
+ This runs early in the recon chain so discovered paths can trigger
867
+ follow-up scans even if gobuster's wordlist doesn't include them.
868
+
869
+ Returns:
870
+ tuple: (robots_paths, sitemap_paths) - lists of discovered URLs
871
+ """
872
+ import re
873
+ import urllib.error
874
+ import urllib.request
875
+ from urllib.parse import urljoin
876
+
877
+ try:
878
+ import defusedxml.ElementTree as ElementTree
879
+ except ImportError:
880
+ import xml.etree.ElementTree as ElementTree
881
+
882
+ parsed = urlparse(base_url)
883
+ base = f"{parsed.scheme}://{parsed.netloc}"
884
+
885
+ robots_paths = []
886
+ sitemap_paths = []
887
+
888
+ # Create SSL context for self-signed certs
889
+ ctx = ssl.create_default_context()
890
+ ctx.check_hostname = False
891
+ ctx.verify_mode = ssl.CERT_NONE
892
+
893
+ # === Fetch robots.txt ===
894
+ try:
895
+ robots_url = urljoin(base + "/", "robots.txt")
896
+ req = urllib.request.Request(
897
+ robots_url,
898
+ headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
899
+ )
900
+ with urllib.request.urlopen(req, timeout=timeout, context=ctx) as response:
901
+ if response.getcode() == 200:
902
+ content = response.read().decode("utf-8", errors="replace")
903
+
904
+ # Known directives to skip
905
+ known_directives = [
906
+ "user-agent:",
907
+ "disallow:",
908
+ "allow:",
909
+ "sitemap:",
910
+ "crawl-delay:",
911
+ "host:",
912
+ "request-rate:",
913
+ ]
914
+
915
+ for line in content.split("\n"):
916
+ line = line.strip()
917
+ if not line or line.startswith("#"):
918
+ continue
919
+
920
+ line_lower = line.lower()
921
+
922
+ # Extract Disallow/Allow paths
923
+ if line_lower.startswith("disallow:") or line_lower.startswith(
924
+ "allow:"
925
+ ):
926
+ _, _, path = line.partition(":")
927
+ path = path.strip()
928
+ if (
929
+ path
930
+ and path != "/"
931
+ and "*" not in path
932
+ and "?" not in path
933
+ ):
934
+ full_url = urljoin(base + "/", path.lstrip("/"))
935
+ if full_url not in robots_paths:
936
+ robots_paths.append(full_url)
937
+
938
+ # Extract Sitemap URLs
939
+ elif line_lower.startswith("sitemap:"):
940
+ _, _, sitemap_url = line.partition(":")
941
+ sitemap_url = sitemap_url.strip()
942
+ # Handle "Sitemap: http://..." format
943
+ if sitemap_url.startswith("//"):
944
+ sitemap_url = parsed.scheme + ":" + sitemap_url
945
+ elif not sitemap_url.startswith("http"):
946
+ sitemap_url = urljoin(
947
+ base + "/", sitemap_url.lstrip("/")
948
+ )
949
+ if sitemap_url not in sitemap_paths:
950
+ sitemap_paths.append(sitemap_url)
951
+
952
+ # Extract bare file paths (CTF-style hints like "key-1-of-3.txt")
953
+ elif not any(
954
+ line_lower.startswith(d) for d in known_directives
955
+ ):
956
+ path = line.strip()
957
+ # Must look like a file with extension
958
+ if path and re.match(r"^[\w\-./]+\.\w{1,5}$", path):
959
+ full_url = urljoin(base + "/", path.lstrip("/"))
960
+ if full_url not in robots_paths:
961
+ robots_paths.append(full_url)
962
+
963
+ except Exception:
964
+ pass # robots.txt fetch is optional
965
+
966
+ # === Fetch sitemap.xml (if not found in robots.txt) ===
967
+ if not sitemap_paths:
968
+ sitemap_paths.append(urljoin(base + "/", "sitemap.xml"))
969
+
970
+ # Try to parse each sitemap
971
+ all_sitemap_urls = []
972
+ for sitemap_url in sitemap_paths[:3]: # Limit to first 3 sitemaps
973
+ try:
974
+ req = urllib.request.Request(
975
+ sitemap_url,
976
+ headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
977
+ )
978
+ with urllib.request.urlopen(
979
+ req, timeout=timeout, context=ctx
980
+ ) as response:
981
+ if response.getcode() == 200:
982
+ content = response.read().decode("utf-8", errors="replace")
983
+ try:
984
+ root = ElementTree.fromstring(content)
985
+ ns = {"sm": "http://www.sitemaps.org/schemas/sitemap/0.9"}
986
+
987
+ # Try with namespace
988
+ for loc in root.findall(".//sm:loc", ns):
989
+ if loc.text and loc.text not in all_sitemap_urls:
990
+ all_sitemap_urls.append(loc.text.strip())
991
+
992
+ # Try without namespace
993
+ if not all_sitemap_urls:
994
+ for loc in root.findall(".//loc"):
995
+ if loc.text and loc.text not in all_sitemap_urls:
996
+ all_sitemap_urls.append(loc.text.strip())
997
+
998
+ except ElementTree.ParseError:
999
+ # Fallback to regex
1000
+ loc_matches = re.findall(r"<loc>([^<]+)</loc>", content)
1001
+ for url in loc_matches:
1002
+ if url not in all_sitemap_urls:
1003
+ all_sitemap_urls.append(url)
1004
+
1005
+ except Exception:
1006
+ pass # sitemap fetch is optional
1007
+
1008
+ # Replace sitemap_paths with actual URLs from sitemaps (limit to 50)
1009
+ if all_sitemap_urls:
1010
+ sitemap_paths = all_sitemap_urls[:50]
1011
+ else:
1012
+ sitemap_paths = [] # Clear if sitemap didn't exist
1013
+
1014
+ return robots_paths, sitemap_paths
1015
+
1016
+ def _quick_path_probe(self, base_url: str, timeout: int = 10) -> Dict[str, Any]:
1017
+ """
1018
+ Quick path probing for CMS detection, admin panels, and API indicators.
1019
+
1020
+ Uses HEAD requests to minimize bandwidth and noise. Only checks paths
1021
+ that return 2xx/3xx/401/403 status codes (indicates existence).
1022
+
1023
+ Returns:
1024
+ dict: {
1025
+ 'cms': {'name': str, 'paths': list} or None,
1026
+ 'admin_panels': [{'path': str, 'status': int}],
1027
+ 'api_endpoints': [{'path': str, 'status': int, 'type': str}]
1028
+ }
1029
+ """
1030
+ import urllib.error
1031
+ import urllib.request
1032
+
1033
+ parsed = urlparse(base_url)
1034
+ base = f"{parsed.scheme}://{parsed.netloc}"
1035
+
1036
+ # Create SSL context for self-signed certs
1037
+ ctx = ssl.create_default_context()
1038
+ ctx.check_hostname = False
1039
+ ctx.verify_mode = ssl.CERT_NONE
1040
+
1041
+ result = {"cms": None, "admin_panels": [], "api_endpoints": []}
1042
+
1043
+ # Define paths to check
1044
+ # Format: (path, category, subcategory/type)
1045
+ paths_to_check = [
1046
+ # CMS Detection
1047
+ ("/wp-admin/", "cms", "WordPress"),
1048
+ ("/wp-login.php", "cms", "WordPress"),
1049
+ ("/wp-includes/", "cms", "WordPress"),
1050
+ ("/administrator/", "cms", "Joomla"),
1051
+ ("/components/com_content/", "cms", "Joomla"),
1052
+ ("/user/login", "cms", "Drupal"),
1053
+ ("/core/misc/drupal.js", "cms", "Drupal"),
1054
+ ("/typo3/", "cms", "TYPO3"),
1055
+ ("/sitecore/", "cms", "Sitecore"),
1056
+ # Admin Panels
1057
+ ("/phpmyadmin/", "admin", "phpMyAdmin"),
1058
+ ("/pma/", "admin", "phpMyAdmin"),
1059
+ ("/admin/", "admin", "Admin Panel"),
1060
+ ("/admin/login", "admin", "Admin Login"),
1061
+ ("/login/", "admin", "Login Page"),
1062
+ ("/login.php", "admin", "Login Page"),
1063
+ ("/manager/", "admin", "Manager"),
1064
+ ("/cpanel/", "admin", "cPanel"),
1065
+ ("/webmail/", "admin", "Webmail"),
1066
+ # API Indicators
1067
+ ("/api/", "api", "REST API"),
1068
+ ("/api/v1/", "api", "REST API v1"),
1069
+ ("/api/v2/", "api", "REST API v2"),
1070
+ ("/graphql", "api", "GraphQL"),
1071
+ ("/graphql/", "api", "GraphQL"),
1072
+ ("/swagger.json", "api", "Swagger/OpenAPI"),
1073
+ ("/swagger/", "api", "Swagger UI"),
1074
+ ("/openapi.json", "api", "OpenAPI"),
1075
+ ("/api-docs/", "api", "API Docs"),
1076
+ ("/v1/", "api", "API v1"),
1077
+ ("/rest/", "api", "REST API"),
1078
+ ]
1079
+
1080
+ # Track CMS detections to avoid duplicates
1081
+ cms_detected = {}
1082
+
1083
+ for path, category, subtype in paths_to_check:
1084
+ try:
1085
+ url = base.rstrip("/") + path
1086
+ req = urllib.request.Request(
1087
+ url,
1088
+ method="HEAD",
1089
+ headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
1090
+ )
1091
+
1092
+ try:
1093
+ with urllib.request.urlopen(
1094
+ req, timeout=timeout, context=ctx
1095
+ ) as response:
1096
+ status = response.getcode()
1097
+ except urllib.error.HTTPError as e:
1098
+ status = e.code
1099
+
1100
+ # Consider 2xx, 3xx, 401, 403 as "exists"
1101
+ if status in (200, 201, 204, 301, 302, 303, 307, 308, 401, 403):
1102
+ if category == "cms":
1103
+ if subtype not in cms_detected:
1104
+ cms_detected[subtype] = []
1105
+ cms_detected[subtype].append({"path": path, "status": status})
1106
+ elif category == "admin":
1107
+ result["admin_panels"].append(
1108
+ {
1109
+ "path": path,
1110
+ "name": subtype,
1111
+ "status": status,
1112
+ "url": url,
1113
+ }
1114
+ )
1115
+ elif category == "api":
1116
+ result["api_endpoints"].append(
1117
+ {
1118
+ "path": path,
1119
+ "type": subtype,
1120
+ "status": status,
1121
+ "url": url,
1122
+ }
1123
+ )
1124
+
1125
+ except Exception:
1126
+ # Timeout or connection error - skip this path
1127
+ continue
1128
+
1129
+ # Determine primary CMS (most path matches)
1130
+ if cms_detected:
1131
+ best_cms = max(cms_detected.items(), key=lambda x: len(x[1]))
1132
+ result["cms"] = {
1133
+ "name": best_cms[0],
1134
+ "paths": best_cms[1],
1135
+ "confidence": "high" if len(best_cms[1]) >= 2 else "medium",
1136
+ }
1137
+
1138
+ return result
534
1139
 
535
1140
  def _format_output(self, target: str, result: Dict[str, Any], label: str) -> str:
536
1141
  """Format fingerprint results for log output."""
@@ -539,33 +1144,50 @@ class HttpFingerprintPlugin(PluginBase):
539
1144
  lines.append(f"Target: {target}")
540
1145
  if label:
541
1146
  lines.append(f"Label: {label}")
542
- lines.append(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}")
1147
+ lines.append(
1148
+ f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}"
1149
+ )
543
1150
  lines.append("=" * 60)
544
1151
  lines.append("")
545
1152
 
546
- if result.get('error'):
1153
+ if result.get("error"):
547
1154
  lines.append(f"ERROR: {result['error']}")
548
- return '\n'.join(lines)
1155
+ return "\n".join(lines)
1156
+
1157
+ # Protocol detection info (smart probe results)
1158
+ protocol_detection = result.get("protocol_detection")
1159
+ if protocol_detection in ("upgraded", "fallback"):
1160
+ lines.append("-" * 40)
1161
+ lines.append(f"PROTOCOL DETECTION: {protocol_detection.upper()}")
1162
+ if result.get("protocol_note"):
1163
+ lines.append(f" {result['protocol_note']}")
1164
+ if result.get("original_url"):
1165
+ lines.append(f" Original URL: {result['original_url']}")
1166
+ lines.append(f" Effective URL: {result.get('effective_url', target)}")
1167
+ lines.append("-" * 40)
1168
+ lines.append("")
549
1169
 
550
1170
  # Status
551
1171
  lines.append(f"HTTP Status: {result.get('status_code', 'N/A')}")
552
1172
 
553
- if result.get('redirect_url'):
1173
+ if result.get("redirect_url"):
554
1174
  lines.append(f"Redirected to: {result['redirect_url']}")
555
1175
 
556
1176
  # Server
557
- if result.get('server'):
1177
+ if result.get("server"):
558
1178
  lines.append(f"Server: {result['server']}")
559
1179
 
560
1180
  # TLS
561
- if result.get('tls'):
562
- tls = result['tls']
563
- lines.append(f"TLS: {tls.get('version', 'Unknown')} ({tls.get('cipher', 'Unknown')})")
1181
+ if result.get("tls"):
1182
+ tls = result["tls"]
1183
+ lines.append(
1184
+ f"TLS: {tls.get('version', 'Unknown')} ({tls.get('cipher', 'Unknown')})"
1185
+ )
564
1186
 
565
1187
  lines.append("")
566
1188
 
567
1189
  # Managed Hosting (most important for tool decisions)
568
- if result.get('managed_hosting'):
1190
+ if result.get("managed_hosting"):
569
1191
  lines.append("-" * 40)
570
1192
  lines.append(f"MANAGED HOSTING DETECTED: {result['managed_hosting']}")
571
1193
  lines.append(" -> CGI enumeration will be skipped")
@@ -574,29 +1196,53 @@ class HttpFingerprintPlugin(PluginBase):
574
1196
  lines.append("")
575
1197
 
576
1198
  # WAF
577
- if result.get('waf'):
1199
+ if result.get("waf"):
578
1200
  lines.append(f"WAF/Protection Detected:")
579
- for waf in result['waf']:
1201
+ for waf in result["waf"]:
580
1202
  lines.append(f" - {waf}")
581
1203
  lines.append("")
582
1204
 
583
1205
  # CDN
584
- if result.get('cdn'):
1206
+ if result.get("cdn"):
585
1207
  lines.append(f"CDN Detected:")
586
- for cdn in result['cdn']:
1208
+ for cdn in result["cdn"]:
587
1209
  lines.append(f" - {cdn}")
588
1210
  lines.append("")
589
1211
 
590
1212
  # Technologies
591
- if result.get('technologies'):
1213
+ if result.get("technologies"):
592
1214
  lines.append(f"Technologies:")
593
- for tech in result['technologies']:
1215
+ for tech in result["technologies"]:
594
1216
  lines.append(f" - {tech}")
595
1217
  lines.append("")
596
1218
 
597
- lines.append(f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===")
1219
+ # Robots.txt paths (discovered files/directories)
1220
+ robots_paths = result.get("robots_paths", [])
1221
+ if robots_paths:
1222
+ lines.append("-" * 40)
1223
+ lines.append(f"ROBOTS.TXT PATHS ({len(robots_paths)} found):")
1224
+ for path in robots_paths[:20]:
1225
+ lines.append(f" - {path}")
1226
+ if len(robots_paths) > 20:
1227
+ lines.append(f" ... and {len(robots_paths) - 20} more")
1228
+ lines.append("-" * 40)
1229
+ lines.append("")
1230
+
1231
+ # Sitemap URLs
1232
+ sitemap_paths = result.get("sitemap_paths", [])
1233
+ if sitemap_paths:
1234
+ lines.append(f"SITEMAP URLS ({len(sitemap_paths)} found):")
1235
+ for url in sitemap_paths[:10]:
1236
+ lines.append(f" - {url}")
1237
+ if len(sitemap_paths) > 10:
1238
+ lines.append(f" ... and {len(sitemap_paths) - 10} more")
1239
+ lines.append("")
1240
+
1241
+ lines.append(
1242
+ f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ==="
1243
+ )
598
1244
 
599
- return '\n'.join(lines)
1245
+ return "\n".join(lines)
600
1246
 
601
1247
 
602
1248
  plugin = HttpFingerprintPlugin()