souleyez 2.43.29__py3-none-any.whl → 2.43.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. souleyez/__init__.py +1 -2
  2. souleyez/ai/__init__.py +21 -15
  3. souleyez/ai/action_mapper.py +249 -150
  4. souleyez/ai/chain_advisor.py +116 -100
  5. souleyez/ai/claude_provider.py +29 -28
  6. souleyez/ai/context_builder.py +80 -62
  7. souleyez/ai/executor.py +158 -117
  8. souleyez/ai/feedback_handler.py +136 -121
  9. souleyez/ai/llm_factory.py +27 -20
  10. souleyez/ai/llm_provider.py +4 -2
  11. souleyez/ai/ollama_provider.py +6 -9
  12. souleyez/ai/ollama_service.py +44 -37
  13. souleyez/ai/path_scorer.py +91 -76
  14. souleyez/ai/recommender.py +176 -144
  15. souleyez/ai/report_context.py +74 -73
  16. souleyez/ai/report_service.py +84 -66
  17. souleyez/ai/result_parser.py +222 -229
  18. souleyez/ai/safety.py +67 -44
  19. souleyez/auth/__init__.py +23 -22
  20. souleyez/auth/audit.py +36 -26
  21. souleyez/auth/engagement_access.py +65 -48
  22. souleyez/auth/permissions.py +14 -3
  23. souleyez/auth/session_manager.py +54 -37
  24. souleyez/auth/user_manager.py +109 -64
  25. souleyez/commands/audit.py +40 -43
  26. souleyez/commands/auth.py +35 -15
  27. souleyez/commands/deliverables.py +55 -50
  28. souleyez/commands/engagement.py +47 -28
  29. souleyez/commands/license.py +32 -23
  30. souleyez/commands/screenshots.py +36 -32
  31. souleyez/commands/user.py +82 -36
  32. souleyez/config.py +52 -44
  33. souleyez/core/credential_tester.py +87 -81
  34. souleyez/core/cve_mappings.py +179 -192
  35. souleyez/core/cve_matcher.py +162 -148
  36. souleyez/core/msf_auto_mapper.py +100 -83
  37. souleyez/core/msf_chain_engine.py +294 -256
  38. souleyez/core/msf_database.py +153 -70
  39. souleyez/core/msf_integration.py +679 -673
  40. souleyez/core/msf_rpc_client.py +40 -42
  41. souleyez/core/msf_rpc_manager.py +77 -79
  42. souleyez/core/msf_sync_manager.py +241 -181
  43. souleyez/core/network_utils.py +22 -15
  44. souleyez/core/parser_handler.py +34 -25
  45. souleyez/core/pending_chains.py +114 -63
  46. souleyez/core/templates.py +158 -107
  47. souleyez/core/tool_chaining.py +9526 -2879
  48. souleyez/core/version_utils.py +79 -94
  49. souleyez/core/vuln_correlation.py +136 -89
  50. souleyez/core/web_utils.py +33 -32
  51. souleyez/data/wordlists/ad_users.txt +378 -0
  52. souleyez/data/wordlists/api_endpoints_large.txt +769 -0
  53. souleyez/data/wordlists/home_dir_sensitive.txt +39 -0
  54. souleyez/data/wordlists/lfi_payloads.txt +82 -0
  55. souleyez/data/wordlists/passwords_brute.txt +1548 -0
  56. souleyez/data/wordlists/passwords_crack.txt +2479 -0
  57. souleyez/data/wordlists/passwords_spray.txt +386 -0
  58. souleyez/data/wordlists/subdomains_large.txt +5057 -0
  59. souleyez/data/wordlists/usernames_common.txt +694 -0
  60. souleyez/data/wordlists/web_dirs_large.txt +4769 -0
  61. souleyez/detection/__init__.py +1 -1
  62. souleyez/detection/attack_signatures.py +12 -17
  63. souleyez/detection/mitre_mappings.py +61 -55
  64. souleyez/detection/validator.py +97 -86
  65. souleyez/devtools.py +23 -10
  66. souleyez/docs/README.md +4 -4
  67. souleyez/docs/api-reference/cli-commands.md +2 -2
  68. souleyez/docs/developer-guide/adding-new-tools.md +562 -0
  69. souleyez/docs/user-guide/auto-chaining.md +30 -8
  70. souleyez/docs/user-guide/getting-started.md +1 -1
  71. souleyez/docs/user-guide/installation.md +26 -3
  72. souleyez/docs/user-guide/metasploit-integration.md +2 -2
  73. souleyez/docs/user-guide/rbac.md +1 -1
  74. souleyez/docs/user-guide/scope-management.md +1 -1
  75. souleyez/docs/user-guide/siem-integration.md +1 -1
  76. souleyez/docs/user-guide/tools-reference.md +1 -8
  77. souleyez/docs/user-guide/worker-management.md +1 -1
  78. souleyez/engine/background.py +1239 -535
  79. souleyez/engine/base.py +4 -1
  80. souleyez/engine/job_status.py +17 -49
  81. souleyez/engine/log_sanitizer.py +103 -77
  82. souleyez/engine/manager.py +38 -7
  83. souleyez/engine/result_handler.py +2200 -1550
  84. souleyez/engine/worker_manager.py +50 -41
  85. souleyez/export/evidence_bundle.py +72 -62
  86. souleyez/feature_flags/features.py +16 -20
  87. souleyez/feature_flags.py +5 -9
  88. souleyez/handlers/__init__.py +11 -0
  89. souleyez/handlers/base.py +188 -0
  90. souleyez/handlers/bash_handler.py +277 -0
  91. souleyez/handlers/bloodhound_handler.py +243 -0
  92. souleyez/handlers/certipy_handler.py +311 -0
  93. souleyez/handlers/crackmapexec_handler.py +486 -0
  94. souleyez/handlers/dnsrecon_handler.py +344 -0
  95. souleyez/handlers/enum4linux_handler.py +400 -0
  96. souleyez/handlers/evil_winrm_handler.py +493 -0
  97. souleyez/handlers/ffuf_handler.py +815 -0
  98. souleyez/handlers/gobuster_handler.py +1114 -0
  99. souleyez/handlers/gpp_extract_handler.py +334 -0
  100. souleyez/handlers/hashcat_handler.py +444 -0
  101. souleyez/handlers/hydra_handler.py +563 -0
  102. souleyez/handlers/impacket_getuserspns_handler.py +343 -0
  103. souleyez/handlers/impacket_psexec_handler.py +222 -0
  104. souleyez/handlers/impacket_secretsdump_handler.py +426 -0
  105. souleyez/handlers/john_handler.py +286 -0
  106. souleyez/handlers/katana_handler.py +425 -0
  107. souleyez/handlers/kerbrute_handler.py +298 -0
  108. souleyez/handlers/ldapsearch_handler.py +636 -0
  109. souleyez/handlers/lfi_extract_handler.py +464 -0
  110. souleyez/handlers/msf_auxiliary_handler.py +408 -0
  111. souleyez/handlers/msf_exploit_handler.py +380 -0
  112. souleyez/handlers/nikto_handler.py +413 -0
  113. souleyez/handlers/nmap_handler.py +821 -0
  114. souleyez/handlers/nuclei_handler.py +359 -0
  115. souleyez/handlers/nxc_handler.py +371 -0
  116. souleyez/handlers/rdp_sec_check_handler.py +353 -0
  117. souleyez/handlers/registry.py +292 -0
  118. souleyez/handlers/responder_handler.py +232 -0
  119. souleyez/handlers/service_explorer_handler.py +434 -0
  120. souleyez/handlers/smbclient_handler.py +344 -0
  121. souleyez/handlers/smbmap_handler.py +510 -0
  122. souleyez/handlers/smbpasswd_handler.py +296 -0
  123. souleyez/handlers/sqlmap_handler.py +1116 -0
  124. souleyez/handlers/theharvester_handler.py +601 -0
  125. souleyez/handlers/web_login_test_handler.py +327 -0
  126. souleyez/handlers/whois_handler.py +277 -0
  127. souleyez/handlers/wpscan_handler.py +554 -0
  128. souleyez/history.py +32 -16
  129. souleyez/importers/msf_importer.py +106 -75
  130. souleyez/importers/smart_importer.py +208 -147
  131. souleyez/integrations/siem/__init__.py +10 -10
  132. souleyez/integrations/siem/base.py +17 -18
  133. souleyez/integrations/siem/elastic.py +108 -122
  134. souleyez/integrations/siem/factory.py +207 -80
  135. souleyez/integrations/siem/googlesecops.py +146 -154
  136. souleyez/integrations/siem/rule_mappings/__init__.py +1 -1
  137. souleyez/integrations/siem/rule_mappings/wazuh_rules.py +8 -5
  138. souleyez/integrations/siem/sentinel.py +107 -109
  139. souleyez/integrations/siem/splunk.py +246 -212
  140. souleyez/integrations/siem/wazuh.py +65 -71
  141. souleyez/integrations/wazuh/__init__.py +5 -5
  142. souleyez/integrations/wazuh/client.py +70 -93
  143. souleyez/integrations/wazuh/config.py +85 -57
  144. souleyez/integrations/wazuh/host_mapper.py +28 -36
  145. souleyez/integrations/wazuh/sync.py +78 -68
  146. souleyez/intelligence/__init__.py +4 -5
  147. souleyez/intelligence/correlation_analyzer.py +309 -295
  148. souleyez/intelligence/exploit_knowledge.py +661 -623
  149. souleyez/intelligence/exploit_suggestions.py +159 -139
  150. souleyez/intelligence/gap_analyzer.py +132 -97
  151. souleyez/intelligence/gap_detector.py +251 -214
  152. souleyez/intelligence/sensitive_tables.py +266 -129
  153. souleyez/intelligence/service_parser.py +137 -123
  154. souleyez/intelligence/surface_analyzer.py +407 -268
  155. souleyez/intelligence/target_parser.py +159 -162
  156. souleyez/licensing/__init__.py +6 -6
  157. souleyez/licensing/validator.py +17 -19
  158. souleyez/log_config.py +79 -54
  159. souleyez/main.py +1505 -687
  160. souleyez/migrations/fix_job_counter.py +16 -14
  161. souleyez/parsers/bloodhound_parser.py +41 -39
  162. souleyez/parsers/crackmapexec_parser.py +178 -111
  163. souleyez/parsers/dalfox_parser.py +72 -77
  164. souleyez/parsers/dnsrecon_parser.py +103 -91
  165. souleyez/parsers/enum4linux_parser.py +183 -153
  166. souleyez/parsers/ffuf_parser.py +29 -25
  167. souleyez/parsers/gobuster_parser.py +301 -41
  168. souleyez/parsers/hashcat_parser.py +324 -79
  169. souleyez/parsers/http_fingerprint_parser.py +350 -103
  170. souleyez/parsers/hydra_parser.py +131 -111
  171. souleyez/parsers/impacket_parser.py +231 -178
  172. souleyez/parsers/john_parser.py +98 -86
  173. souleyez/parsers/katana_parser.py +316 -0
  174. souleyez/parsers/msf_parser.py +943 -498
  175. souleyez/parsers/nikto_parser.py +346 -65
  176. souleyez/parsers/nmap_parser.py +262 -174
  177. souleyez/parsers/nuclei_parser.py +40 -44
  178. souleyez/parsers/responder_parser.py +26 -26
  179. souleyez/parsers/searchsploit_parser.py +74 -74
  180. souleyez/parsers/service_explorer_parser.py +279 -0
  181. souleyez/parsers/smbmap_parser.py +180 -124
  182. souleyez/parsers/sqlmap_parser.py +434 -308
  183. souleyez/parsers/theharvester_parser.py +75 -57
  184. souleyez/parsers/whois_parser.py +135 -94
  185. souleyez/parsers/wpscan_parser.py +278 -190
  186. souleyez/plugins/afp.py +44 -36
  187. souleyez/plugins/afp_brute.py +114 -46
  188. souleyez/plugins/ard.py +48 -37
  189. souleyez/plugins/bloodhound.py +95 -61
  190. souleyez/plugins/certipy.py +303 -0
  191. souleyez/plugins/crackmapexec.py +186 -85
  192. souleyez/plugins/dalfox.py +120 -59
  193. souleyez/plugins/dns_hijack.py +146 -41
  194. souleyez/plugins/dnsrecon.py +97 -61
  195. souleyez/plugins/enum4linux.py +91 -66
  196. souleyez/plugins/evil_winrm.py +291 -0
  197. souleyez/plugins/ffuf.py +166 -90
  198. souleyez/plugins/firmware_extract.py +133 -29
  199. souleyez/plugins/gobuster.py +387 -190
  200. souleyez/plugins/gpp_extract.py +393 -0
  201. souleyez/plugins/hashcat.py +100 -73
  202. souleyez/plugins/http_fingerprint.py +854 -267
  203. souleyez/plugins/hydra.py +566 -200
  204. souleyez/plugins/impacket_getnpusers.py +117 -69
  205. souleyez/plugins/impacket_psexec.py +84 -64
  206. souleyez/plugins/impacket_secretsdump.py +103 -69
  207. souleyez/plugins/impacket_smbclient.py +89 -75
  208. souleyez/plugins/john.py +86 -69
  209. souleyez/plugins/katana.py +313 -0
  210. souleyez/plugins/kerbrute.py +237 -0
  211. souleyez/plugins/lfi_extract.py +541 -0
  212. souleyez/plugins/macos_ssh.py +117 -48
  213. souleyez/plugins/mdns.py +35 -30
  214. souleyez/plugins/msf_auxiliary.py +253 -130
  215. souleyez/plugins/msf_exploit.py +239 -161
  216. souleyez/plugins/nikto.py +134 -78
  217. souleyez/plugins/nmap.py +275 -91
  218. souleyez/plugins/nuclei.py +180 -89
  219. souleyez/plugins/nxc.py +285 -0
  220. souleyez/plugins/plugin_base.py +35 -36
  221. souleyez/plugins/plugin_template.py +13 -5
  222. souleyez/plugins/rdp_sec_check.py +130 -0
  223. souleyez/plugins/responder.py +112 -71
  224. souleyez/plugins/router_http_brute.py +76 -65
  225. souleyez/plugins/router_ssh_brute.py +118 -41
  226. souleyez/plugins/router_telnet_brute.py +124 -42
  227. souleyez/plugins/routersploit.py +91 -59
  228. souleyez/plugins/routersploit_exploit.py +77 -55
  229. souleyez/plugins/searchsploit.py +91 -77
  230. souleyez/plugins/service_explorer.py +1160 -0
  231. souleyez/plugins/smbmap.py +122 -72
  232. souleyez/plugins/smbpasswd.py +215 -0
  233. souleyez/plugins/sqlmap.py +301 -113
  234. souleyez/plugins/theharvester.py +127 -75
  235. souleyez/plugins/tr069.py +79 -57
  236. souleyez/plugins/upnp.py +65 -47
  237. souleyez/plugins/upnp_abuse.py +73 -55
  238. souleyez/plugins/vnc_access.py +129 -42
  239. souleyez/plugins/vnc_brute.py +109 -38
  240. souleyez/plugins/web_login_test.py +417 -0
  241. souleyez/plugins/whois.py +77 -58
  242. souleyez/plugins/wpscan.py +173 -69
  243. souleyez/reporting/__init__.py +2 -1
  244. souleyez/reporting/attack_chain.py +411 -346
  245. souleyez/reporting/charts.py +436 -501
  246. souleyez/reporting/compliance_mappings.py +334 -201
  247. souleyez/reporting/detection_report.py +126 -125
  248. souleyez/reporting/formatters.py +828 -591
  249. souleyez/reporting/generator.py +386 -302
  250. souleyez/reporting/metrics.py +72 -75
  251. souleyez/scanner.py +35 -29
  252. souleyez/security/__init__.py +37 -11
  253. souleyez/security/scope_validator.py +175 -106
  254. souleyez/security/validation.py +223 -149
  255. souleyez/security.py +22 -6
  256. souleyez/storage/credentials.py +247 -186
  257. souleyez/storage/crypto.py +296 -129
  258. souleyez/storage/database.py +73 -50
  259. souleyez/storage/db.py +58 -36
  260. souleyez/storage/deliverable_evidence.py +177 -128
  261. souleyez/storage/deliverable_exporter.py +282 -246
  262. souleyez/storage/deliverable_templates.py +134 -116
  263. souleyez/storage/deliverables.py +135 -130
  264. souleyez/storage/engagements.py +109 -56
  265. souleyez/storage/evidence.py +181 -152
  266. souleyez/storage/execution_log.py +31 -17
  267. souleyez/storage/exploit_attempts.py +93 -57
  268. souleyez/storage/exploits.py +67 -36
  269. souleyez/storage/findings.py +48 -61
  270. souleyez/storage/hosts.py +176 -144
  271. souleyez/storage/migrate_to_engagements.py +43 -19
  272. souleyez/storage/migrations/_001_add_credential_enhancements.py +22 -12
  273. souleyez/storage/migrations/_002_add_status_tracking.py +10 -7
  274. souleyez/storage/migrations/_003_add_execution_log.py +14 -8
  275. souleyez/storage/migrations/_005_screenshots.py +13 -5
  276. souleyez/storage/migrations/_006_deliverables.py +13 -5
  277. souleyez/storage/migrations/_007_deliverable_templates.py +12 -7
  278. souleyez/storage/migrations/_008_add_nuclei_table.py +10 -4
  279. souleyez/storage/migrations/_010_evidence_linking.py +17 -10
  280. souleyez/storage/migrations/_011_timeline_tracking.py +20 -13
  281. souleyez/storage/migrations/_012_team_collaboration.py +34 -21
  282. souleyez/storage/migrations/_013_add_host_tags.py +12 -6
  283. souleyez/storage/migrations/_014_exploit_attempts.py +22 -10
  284. souleyez/storage/migrations/_015_add_mac_os_fields.py +15 -7
  285. souleyez/storage/migrations/_016_add_domain_field.py +10 -4
  286. souleyez/storage/migrations/_017_msf_sessions.py +16 -8
  287. souleyez/storage/migrations/_018_add_osint_target.py +10 -6
  288. souleyez/storage/migrations/_019_add_engagement_type.py +10 -6
  289. souleyez/storage/migrations/_020_add_rbac.py +36 -15
  290. souleyez/storage/migrations/_021_wazuh_integration.py +20 -8
  291. souleyez/storage/migrations/_022_wazuh_indexer_columns.py +6 -4
  292. souleyez/storage/migrations/_023_fix_detection_results_fk.py +16 -6
  293. souleyez/storage/migrations/_024_wazuh_vulnerabilities.py +26 -10
  294. souleyez/storage/migrations/_025_multi_siem_support.py +3 -5
  295. souleyez/storage/migrations/_026_add_engagement_scope.py +31 -12
  296. souleyez/storage/migrations/_027_multi_siem_persistence.py +32 -15
  297. souleyez/storage/migrations/__init__.py +26 -26
  298. souleyez/storage/migrations/migration_manager.py +19 -19
  299. souleyez/storage/msf_sessions.py +100 -65
  300. souleyez/storage/osint.py +17 -24
  301. souleyez/storage/recommendation_engine.py +269 -235
  302. souleyez/storage/screenshots.py +33 -32
  303. souleyez/storage/smb_shares.py +136 -92
  304. souleyez/storage/sqlmap_data.py +183 -128
  305. souleyez/storage/team_collaboration.py +135 -141
  306. souleyez/storage/timeline_tracker.py +122 -94
  307. souleyez/storage/wazuh_vulns.py +64 -66
  308. souleyez/storage/web_paths.py +33 -37
  309. souleyez/testing/credential_tester.py +221 -205
  310. souleyez/ui/__init__.py +1 -1
  311. souleyez/ui/ai_quotes.py +12 -12
  312. souleyez/ui/attack_surface.py +2439 -1516
  313. souleyez/ui/chain_rules_view.py +914 -382
  314. souleyez/ui/correlation_view.py +312 -230
  315. souleyez/ui/dashboard.py +2382 -1130
  316. souleyez/ui/deliverables_view.py +148 -62
  317. souleyez/ui/design_system.py +13 -13
  318. souleyez/ui/errors.py +49 -49
  319. souleyez/ui/evidence_linking_view.py +284 -179
  320. souleyez/ui/evidence_vault.py +393 -285
  321. souleyez/ui/exploit_suggestions_view.py +555 -349
  322. souleyez/ui/export_view.py +100 -66
  323. souleyez/ui/gap_analysis_view.py +315 -171
  324. souleyez/ui/help_system.py +105 -97
  325. souleyez/ui/intelligence_view.py +436 -293
  326. souleyez/ui/interactive.py +22827 -10678
  327. souleyez/ui/interactive_selector.py +75 -68
  328. souleyez/ui/log_formatter.py +47 -39
  329. souleyez/ui/menu_components.py +22 -13
  330. souleyez/ui/msf_auxiliary_menu.py +184 -133
  331. souleyez/ui/pending_chains_view.py +336 -172
  332. souleyez/ui/progress_indicators.py +5 -3
  333. souleyez/ui/recommendations_view.py +195 -137
  334. souleyez/ui/rule_builder.py +343 -225
  335. souleyez/ui/setup_wizard.py +678 -284
  336. souleyez/ui/shortcuts.py +217 -165
  337. souleyez/ui/splunk_gap_analysis_view.py +452 -270
  338. souleyez/ui/splunk_vulns_view.py +139 -86
  339. souleyez/ui/team_dashboard.py +498 -335
  340. souleyez/ui/template_selector.py +196 -105
  341. souleyez/ui/terminal.py +6 -6
  342. souleyez/ui/timeline_view.py +198 -127
  343. souleyez/ui/tool_setup.py +264 -164
  344. souleyez/ui/tutorial.py +202 -72
  345. souleyez/ui/tutorial_state.py +40 -40
  346. souleyez/ui/wazuh_vulns_view.py +235 -141
  347. souleyez/ui/wordlist_browser.py +260 -107
  348. souleyez/ui.py +464 -312
  349. souleyez/utils/tool_checker.py +427 -367
  350. souleyez/utils.py +33 -29
  351. souleyez/wordlists.py +134 -167
  352. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/METADATA +1 -1
  353. souleyez-2.43.34.dist-info/RECORD +443 -0
  354. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/WHEEL +1 -1
  355. souleyez-2.43.29.dist-info/RECORD +0 -379
  356. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/entry_points.txt +0 -0
  357. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/licenses/LICENSE +0 -0
  358. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/top_level.txt +0 -0
@@ -11,11 +11,12 @@ Detects:
11
11
 
12
12
  This runs BEFORE web vulnerability scanners to enable smarter tool configuration.
13
13
  """
14
+
14
15
  import json
15
- import time
16
- import ssl
17
16
  import socket
18
- from typing import Dict, Any, List, Optional
17
+ import ssl
18
+ import time
19
+ from typing import Any, Dict, List, Optional
19
20
  from urllib.parse import urlparse
20
21
 
21
22
  from .plugin_base import PluginBase
@@ -45,238 +46,342 @@ HELP = {
45
46
  "presets": [
46
47
  {"name": "Quick Fingerprint", "args": [], "desc": "Fast fingerprint scan"},
47
48
  ],
49
+ "help_sections": [
50
+ {
51
+ "title": "What is HTTP Fingerprinting?",
52
+ "color": "cyan",
53
+ "content": [
54
+ (
55
+ "Overview",
56
+ [
57
+ "Lightweight reconnaissance that identifies web infrastructure",
58
+ "Runs automatically before vulnerability scanners",
59
+ "Enables smarter tool configuration based on detected technology",
60
+ ],
61
+ ),
62
+ (
63
+ "What It Detects",
64
+ [
65
+ "Server software - Apache, nginx, IIS, LiteSpeed",
66
+ "WAFs - Cloudflare, Akamai, AWS WAF, Imperva, Sucuri",
67
+ "CDNs - Cloudflare, Fastly, CloudFront, Akamai",
68
+ "Managed hosting - Squarespace, Wix, Shopify, Netlify",
69
+ ],
70
+ ),
71
+ ],
72
+ },
73
+ {
74
+ "title": "Usage & Examples",
75
+ "color": "green",
76
+ "content": [
77
+ (
78
+ "Basic Usage",
79
+ [
80
+ "souleyez jobs enqueue http_fingerprint http://example.com",
81
+ "souleyez jobs enqueue http_fingerprint https://example.com",
82
+ " → Detects server, WAF, CDN, and hosting platform",
83
+ ],
84
+ ),
85
+ ],
86
+ },
87
+ {
88
+ "title": "Why This Matters",
89
+ "color": "yellow",
90
+ "content": [
91
+ (
92
+ "Smart Tool Configuration",
93
+ [
94
+ "If Squarespace detected → skip CGI enumeration (pointless)",
95
+ "If Cloudflare WAF detected → adjust scan rate to avoid blocks",
96
+ "If nginx detected → test nginx-specific vulnerabilities",
97
+ ],
98
+ ),
99
+ (
100
+ "Attack Surface Mapping",
101
+ [
102
+ "Managed platforms have limited attack surface",
103
+ "WAFs require evasion techniques or finding bypasses",
104
+ "CDNs may hide the real origin server IP",
105
+ ],
106
+ ),
107
+ ],
108
+ },
109
+ ],
48
110
  }
49
111
 
50
112
  # WAF detection signatures
51
113
  # Format: {header_name: {value_pattern: waf_name}}
52
114
  WAF_SIGNATURES = {
53
115
  # Header-based detection
54
- 'headers': {
55
- 'server': {
56
- 'cloudflare': 'Cloudflare',
57
- 'akamaighost': 'Akamai',
58
- 'akamainetworkstorage': 'Akamai',
59
- 'awselb': 'AWS ELB',
60
- 'bigip': 'F5 BIG-IP',
61
- 'barracuda': 'Barracuda',
62
- 'denyall': 'DenyAll',
63
- 'fortigate': 'Fortinet FortiGate',
64
- 'imperva': 'Imperva',
65
- 'incapsula': 'Imperva Incapsula',
66
- 'netscaler': 'Citrix NetScaler',
67
- 'sucuri': 'Sucuri',
68
- 'wallarm': 'Wallarm',
116
+ "headers": {
117
+ "server": {
118
+ "cloudflare": "Cloudflare",
119
+ "akamaighost": "Akamai",
120
+ "akamainetworkstorage": "Akamai",
121
+ "awselb": "AWS ELB",
122
+ "bigip": "F5 BIG-IP",
123
+ "barracuda": "Barracuda",
124
+ "denyall": "DenyAll",
125
+ "fortigate": "Fortinet FortiGate",
126
+ "imperva": "Imperva",
127
+ "incapsula": "Imperva Incapsula",
128
+ "netscaler": "Citrix NetScaler",
129
+ "sucuri": "Sucuri",
130
+ "wallarm": "Wallarm",
69
131
  },
70
- 'x-powered-by': {
71
- 'aws lambda': 'AWS Lambda',
72
- 'express': 'Express.js',
73
- 'php': 'PHP',
74
- 'asp.net': 'ASP.NET',
132
+ "x-powered-by": {
133
+ "aws lambda": "AWS Lambda",
134
+ "express": "Express.js",
135
+ "php": "PHP",
136
+ "asp.net": "ASP.NET",
75
137
  },
76
- 'x-sucuri-id': {'': 'Sucuri'},
77
- 'x-sucuri-cache': {'': 'Sucuri'},
78
- 'cf-ray': {'': 'Cloudflare'},
79
- 'cf-cache-status': {'': 'Cloudflare'},
80
- 'x-amz-cf-id': {'': 'AWS CloudFront'},
81
- 'x-amz-cf-pop': {'': 'AWS CloudFront'},
82
- 'x-akamai-transformed': {'': 'Akamai'},
83
- 'x-cache': {
84
- 'cloudfront': 'AWS CloudFront',
85
- 'varnish': 'Varnish',
138
+ "x-sucuri-id": {"": "Sucuri"},
139
+ "x-sucuri-cache": {"": "Sucuri"},
140
+ "cf-ray": {"": "Cloudflare"},
141
+ "cf-cache-status": {"": "Cloudflare"},
142
+ "x-amz-cf-id": {"": "AWS CloudFront"},
143
+ "x-amz-cf-pop": {"": "AWS CloudFront"},
144
+ "x-akamai-transformed": {"": "Akamai"},
145
+ "x-cache": {
146
+ "cloudfront": "AWS CloudFront",
147
+ "varnish": "Varnish",
86
148
  },
87
- 'x-fastly-request-id': {'': 'Fastly'},
88
- 'x-served-by': {
89
- 'cache-': 'Fastly',
149
+ "x-fastly-request-id": {"": "Fastly"},
150
+ "x-served-by": {
151
+ "cache-": "Fastly",
90
152
  },
91
- 'x-cdn': {
92
- 'incapsula': 'Imperva Incapsula',
93
- 'cloudflare': 'Cloudflare',
153
+ "x-cdn": {
154
+ "incapsula": "Imperva Incapsula",
155
+ "cloudflare": "Cloudflare",
94
156
  },
95
- 'x-iinfo': {'': 'Imperva Incapsula'},
96
- 'x-proxy-id': {'': 'Imperva'},
97
- 'x-request-id': {}, # Generic, but useful context
98
- 'x-fw-protection': {'': 'Unknown WAF'},
99
- 'x-protected-by': {'': 'Unknown WAF'},
100
- 'x-waf-status': {'': 'Unknown WAF'},
101
- 'x-denied-reason': {'': 'Unknown WAF'},
157
+ "x-iinfo": {"": "Imperva Incapsula"},
158
+ "x-proxy-id": {"": "Imperva"},
159
+ "x-request-id": {}, # Generic, but useful context
160
+ "x-fw-protection": {"": "Unknown WAF"},
161
+ "x-protected-by": {"": "Unknown WAF"},
162
+ "x-waf-status": {"": "Unknown WAF"},
163
+ "x-denied-reason": {"": "Unknown WAF"},
102
164
  },
103
165
  # Cookie-based detection
104
- 'cookies': {
105
- '__cfduid': 'Cloudflare',
106
- 'cf_clearance': 'Cloudflare',
107
- '__cf_bm': 'Cloudflare Bot Management',
108
- 'incap_ses': 'Imperva Incapsula',
109
- 'visid_incap': 'Imperva Incapsula',
110
- 'nlbi_': 'Imperva Incapsula',
111
- 'ak_bmsc': 'Akamai Bot Manager',
112
- 'bm_sz': 'Akamai Bot Manager',
113
- '_abck': 'Akamai Bot Manager',
114
- 'awsalb': 'AWS ALB',
115
- 'awsalbcors': 'AWS ALB',
116
- 'ts': 'F5 BIG-IP',
117
- 'bigipserver': 'F5 BIG-IP',
118
- 'citrix_ns_id': 'Citrix NetScaler',
119
- 'sucuri_cloudproxy': 'Sucuri',
166
+ "cookies": {
167
+ "__cfduid": "Cloudflare",
168
+ "cf_clearance": "Cloudflare",
169
+ "__cf_bm": "Cloudflare Bot Management",
170
+ "incap_ses": "Imperva Incapsula",
171
+ "visid_incap": "Imperva Incapsula",
172
+ "nlbi_": "Imperva Incapsula",
173
+ "ak_bmsc": "Akamai Bot Manager",
174
+ "bm_sz": "Akamai Bot Manager",
175
+ "_abck": "Akamai Bot Manager",
176
+ "awsalb": "AWS ALB",
177
+ "awsalbcors": "AWS ALB",
178
+ "ts": "F5 BIG-IP",
179
+ "bigipserver": "F5 BIG-IP",
180
+ "citrix_ns_id": "Citrix NetScaler",
181
+ "sucuri_cloudproxy": "Sucuri",
120
182
  },
121
183
  }
122
184
 
123
185
  # CDN detection signatures
124
186
  CDN_SIGNATURES = {
125
- 'headers': {
126
- 'cf-ray': 'Cloudflare',
127
- 'cf-cache-status': 'Cloudflare',
128
- 'x-amz-cf-id': 'AWS CloudFront',
129
- 'x-amz-cf-pop': 'AWS CloudFront',
130
- 'x-cache': {
131
- 'cloudfront': 'AWS CloudFront',
132
- 'hit from cloudfront': 'AWS CloudFront',
187
+ "headers": {
188
+ "cf-ray": "Cloudflare",
189
+ "cf-cache-status": "Cloudflare",
190
+ "x-amz-cf-id": "AWS CloudFront",
191
+ "x-amz-cf-pop": "AWS CloudFront",
192
+ "x-cache": {
193
+ "cloudfront": "AWS CloudFront",
194
+ "hit from cloudfront": "AWS CloudFront",
133
195
  },
134
- 'x-fastly-request-id': 'Fastly',
135
- 'x-served-by': 'Fastly',
136
- 'x-akamai-transformed': 'Akamai',
137
- 'x-akamai-request-id': 'Akamai',
138
- 'x-edge-location': 'Generic CDN',
139
- 'x-cdn': 'Generic CDN',
140
- 'x-cache-status': 'Generic CDN',
141
- 'x-varnish': 'Varnish',
142
- 'via': {
143
- 'cloudfront': 'AWS CloudFront',
144
- 'varnish': 'Varnish',
145
- 'akamai': 'Akamai',
196
+ "x-fastly-request-id": "Fastly",
197
+ "x-served-by": "Fastly",
198
+ "x-akamai-transformed": "Akamai",
199
+ "x-akamai-request-id": "Akamai",
200
+ "x-edge-location": "Generic CDN",
201
+ "x-cdn": "Generic CDN",
202
+ "x-cache-status": "Generic CDN",
203
+ "x-varnish": "Varnish",
204
+ "via": {
205
+ "cloudfront": "AWS CloudFront",
206
+ "varnish": "Varnish",
207
+ "akamai": "Akamai",
146
208
  },
147
- 'x-azure-ref': 'Azure CDN',
148
- 'x-msedge-ref': 'Azure CDN',
149
- 'x-goog-': 'Google Cloud CDN',
150
- 'x-bunny-': 'Bunny CDN',
151
- 'x-hw': 'Huawei CDN',
209
+ "x-azure-ref": "Azure CDN",
210
+ "x-msedge-ref": "Azure CDN",
211
+ "x-goog-": "Google Cloud CDN",
212
+ "x-bunny-": "Bunny CDN",
213
+ "x-hw": "Huawei CDN",
152
214
  },
153
- 'server': {
154
- 'cloudflare': 'Cloudflare',
155
- 'akamaighost': 'Akamai',
156
- 'cloudfront': 'AWS CloudFront',
157
- 'fastly': 'Fastly',
158
- 'varnish': 'Varnish',
159
- 'keycdn': 'KeyCDN',
160
- 'bunnycdn': 'Bunny CDN',
161
- 'cdn77': 'CDN77',
162
- 'stackpath': 'StackPath',
163
- 'limelight': 'Limelight',
164
- 'azure': 'Azure CDN',
215
+ "server": {
216
+ "cloudflare": "Cloudflare",
217
+ "akamaighost": "Akamai",
218
+ "cloudfront": "AWS CloudFront",
219
+ "fastly": "Fastly",
220
+ "varnish": "Varnish",
221
+ "keycdn": "KeyCDN",
222
+ "bunnycdn": "Bunny CDN",
223
+ "cdn77": "CDN77",
224
+ "stackpath": "StackPath",
225
+ "limelight": "Limelight",
226
+ "azure": "Azure CDN",
165
227
  },
166
228
  }
167
229
 
168
230
  # Managed hosting platform signatures
169
231
  MANAGED_HOSTING_SIGNATURES = {
170
- 'server': {
171
- 'squarespace': 'Squarespace',
172
- 'wix': 'Wix',
173
- 'shopify': 'Shopify',
174
- 'weebly': 'Weebly',
175
- 'webflow': 'Webflow',
176
- 'ghost': 'Ghost',
177
- 'medium': 'Medium',
178
- 'tumblr': 'Tumblr',
179
- 'blogger': 'Blogger/Blogspot',
180
- 'wordpress.com': 'WordPress.com',
181
- 'netlify': 'Netlify',
182
- 'vercel': 'Vercel',
183
- 'heroku': 'Heroku',
184
- 'github': 'GitHub Pages',
185
- 'gitlab': 'GitLab Pages',
186
- 'firebase': 'Firebase Hosting',
187
- 'render': 'Render',
188
- 'railway': 'Railway',
189
- 'fly': 'Fly.io',
190
- 'deno': 'Deno Deploy',
232
+ "server": {
233
+ "squarespace": "Squarespace",
234
+ "wix": "Wix",
235
+ "shopify": "Shopify",
236
+ "weebly": "Weebly",
237
+ "webflow": "Webflow",
238
+ "ghost": "Ghost",
239
+ "medium": "Medium",
240
+ "tumblr": "Tumblr",
241
+ "blogger": "Blogger/Blogspot",
242
+ "wordpress.com": "WordPress.com",
243
+ "netlify": "Netlify",
244
+ "vercel": "Vercel",
245
+ "heroku": "Heroku",
246
+ "github": "GitHub Pages",
247
+ "gitlab": "GitLab Pages",
248
+ "firebase": "Firebase Hosting",
249
+ "render": "Render",
250
+ "railway": "Railway",
251
+ "fly": "Fly.io",
252
+ "deno": "Deno Deploy",
191
253
  },
192
- 'headers': {
193
- 'x-shopify-stage': 'Shopify',
194
- 'x-shopify-request-id': 'Shopify',
195
- 'x-wix-request-id': 'Wix',
196
- 'x-wix-renderer-server': 'Wix',
197
- 'x-sqsp-edge': 'Squarespace',
198
- 'x-squarespace-': 'Squarespace',
199
- 'x-ghost-': 'Ghost',
200
- 'x-medium-content': 'Medium',
201
- 'x-tumblr-': 'Tumblr',
202
- 'x-blogger-': 'Blogger/Blogspot',
203
- 'x-netlify-': 'Netlify',
204
- 'x-nf-request-id': 'Netlify',
205
- 'x-vercel-': 'Vercel',
206
- 'x-vercel-id': 'Vercel',
207
- 'x-heroku-': 'Heroku',
208
- 'x-github-request-id': 'GitHub Pages',
209
- 'x-firebase-': 'Firebase Hosting',
210
- 'x-render-origin-server': 'Render',
211
- 'fly-request-id': 'Fly.io',
254
+ "headers": {
255
+ "x-shopify-stage": "Shopify",
256
+ "x-shopify-request-id": "Shopify",
257
+ "x-wix-request-id": "Wix",
258
+ "x-wix-renderer-server": "Wix",
259
+ "x-sqsp-edge": "Squarespace",
260
+ "x-squarespace-": "Squarespace",
261
+ "x-ghost-": "Ghost",
262
+ "x-medium-content": "Medium",
263
+ "x-tumblr-": "Tumblr",
264
+ "x-blogger-": "Blogger/Blogspot",
265
+ "x-netlify-": "Netlify",
266
+ "x-nf-request-id": "Netlify",
267
+ "x-vercel-": "Vercel",
268
+ "x-vercel-id": "Vercel",
269
+ "x-heroku-": "Heroku",
270
+ "x-github-request-id": "GitHub Pages",
271
+ "x-firebase-": "Firebase Hosting",
272
+ "x-render-origin-server": "Render",
273
+ "fly-request-id": "Fly.io",
212
274
  },
213
- 'cookies': {
214
- 'wordpress_': 'WordPress',
215
- 'wp-settings': 'WordPress',
216
- '_shopify_': 'Shopify',
217
- 'wixSession': 'Wix',
275
+ "cookies": {
276
+ "wordpress_": "WordPress",
277
+ "wp-settings": "WordPress",
278
+ "_shopify_": "Shopify",
279
+ "wixSession": "Wix",
218
280
  },
219
281
  }
220
282
 
221
283
  # Server software signatures
222
284
  SERVER_SIGNATURES = {
223
- 'apache': 'Apache',
224
- 'nginx': 'nginx',
225
- 'microsoft-iis': 'Microsoft IIS',
226
- 'iis': 'Microsoft IIS',
227
- 'lighttpd': 'lighttpd',
228
- 'litespeed': 'LiteSpeed',
229
- 'openresty': 'OpenResty',
230
- 'caddy': 'Caddy',
231
- 'tomcat': 'Apache Tomcat',
232
- 'jetty': 'Eclipse Jetty',
233
- 'gunicorn': 'Gunicorn',
234
- 'uvicorn': 'Uvicorn',
235
- 'werkzeug': 'Werkzeug (Flask)',
236
- 'waitress': 'Waitress',
237
- 'cowboy': 'Cowboy (Erlang)',
238
- 'kestrel': 'Kestrel (ASP.NET)',
239
- 'express': 'Express.js',
285
+ "apache": "Apache",
286
+ "nginx": "nginx",
287
+ "microsoft-iis": "Microsoft IIS",
288
+ "iis": "Microsoft IIS",
289
+ "lighttpd": "lighttpd",
290
+ "litespeed": "LiteSpeed",
291
+ "openresty": "OpenResty",
292
+ "caddy": "Caddy",
293
+ "tomcat": "Apache Tomcat",
294
+ "jetty": "Eclipse Jetty",
295
+ "gunicorn": "Gunicorn",
296
+ "uvicorn": "Uvicorn",
297
+ "werkzeug": "Werkzeug (Flask)",
298
+ "waitress": "Waitress",
299
+ "cowboy": "Cowboy (Erlang)",
300
+ "kestrel": "Kestrel (ASP.NET)",
301
+ "express": "Express.js",
240
302
  }
241
303
 
242
304
 
243
305
  class HttpFingerprintPlugin(PluginBase):
244
306
  name = "HTTP Fingerprint"
245
307
  tool = "http_fingerprint"
246
- category = "recon"
308
+ category = "scanning"
247
309
  HELP = HELP
248
310
 
249
- def build_command(self, target: str, args: List[str] = None, label: str = "", log_path: str = None):
311
+ def build_command(
312
+ self, target: str, args: List[str] = None, label: str = "", log_path: str = None
313
+ ):
250
314
  """
251
315
  HTTP fingerprinting is done in Python, not via external command.
252
316
  Return None to use run() method instead.
253
317
  """
254
318
  return None
255
319
 
256
- def run(self, target: str, args: List[str] = None, label: str = "", log_path: str = None) -> int:
257
- """Execute HTTP fingerprint scan."""
320
+ def run(
321
+ self, target: str, args: List[str] = None, label: str = "", log_path: str = None
322
+ ) -> int:
323
+ """Execute HTTP fingerprint scan with smart protocol detection."""
258
324
  args = args or []
259
325
  timeout = 10
260
326
 
261
327
  # Parse timeout from args
262
328
  for i, arg in enumerate(args):
263
- if arg == '--timeout' and i + 1 < len(args):
329
+ if arg == "--timeout" and i + 1 < len(args):
264
330
  try:
265
331
  timeout = int(args[i + 1])
266
332
  except ValueError:
267
333
  pass
268
334
 
269
335
  # Ensure target has scheme
270
- if not target.startswith(('http://', 'https://')):
271
- target = f'http://{target}'
336
+ if not target.startswith(("http://", "https://")):
337
+ target = f"http://{target}"
272
338
 
273
339
  try:
274
- result = self._fingerprint(target, timeout)
275
- output = self._format_output(target, result, label)
340
+ # Smart dual-probe: try both HTTP and HTTPS, use the better one
341
+ result, effective_url = self._smart_probe(target, timeout)
342
+ output = self._format_output(effective_url, result, label)
276
343
 
277
344
  if log_path:
278
- with open(log_path, 'a', encoding='utf-8', errors='replace') as fh:
345
+ with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
279
346
  fh.write(output)
347
+ # Fetch robots.txt and sitemap.xml for path discovery
348
+ robots_paths, sitemap_paths = self._fetch_robots_sitemap(
349
+ effective_url, timeout
350
+ )
351
+ result["robots_paths"] = robots_paths
352
+ result["sitemap_paths"] = sitemap_paths
353
+
354
+ # Quick path probing for CMS, admin panels, API endpoints
355
+ quick_probe = self._quick_path_probe(effective_url, timeout)
356
+ result["cms_detected"] = quick_probe.get("cms")
357
+ result["admin_panels"] = quick_probe.get("admin_panels", [])
358
+ result["api_endpoints"] = quick_probe.get("api_endpoints", [])
359
+
360
+ # Write additional detections to log
361
+ if quick_probe.get("cms"):
362
+ cms = quick_probe["cms"]
363
+ fh.write(f"\n{'=' * 40}\n")
364
+ fh.write(
365
+ f"CMS DETECTED: {cms['name']} ({cms['confidence']} confidence)\n"
366
+ )
367
+ for p in cms["paths"]:
368
+ fh.write(f" - {p['path']} (HTTP {p['status']})\n")
369
+ fh.write(f"{'=' * 40}\n")
370
+
371
+ if quick_probe.get("admin_panels"):
372
+ fh.write(f"\nADMIN PANELS FOUND:\n")
373
+ for panel in quick_probe["admin_panels"]:
374
+ fh.write(
375
+ f" - {panel['name']}: {panel['url']} (HTTP {panel['status']})\n"
376
+ )
377
+
378
+ if quick_probe.get("api_endpoints"):
379
+ fh.write(f"\nAPI ENDPOINTS FOUND:\n")
380
+ for api in quick_probe["api_endpoints"]:
381
+ fh.write(
382
+ f" - {api['type']}: {api['url']} (HTTP {api['status']})\n"
383
+ )
384
+
280
385
  # Write JSON result for parsing
281
386
  fh.write("\n\n=== JSON_RESULT ===\n")
282
387
  fh.write(json.dumps(result, indent=2))
@@ -290,11 +395,159 @@ class HttpFingerprintPlugin(PluginBase):
290
395
  error_output += f"Error: {type(e).__name__}: {e}\n"
291
396
 
292
397
  if log_path:
293
- with open(log_path, 'a', encoding='utf-8', errors='replace') as fh:
398
+ with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
294
399
  fh.write(error_output)
295
400
 
296
401
  return 1
297
402
 
403
+ def _smart_probe(self, target: str, timeout: int = 10) -> tuple:
404
+ """
405
+ Smart protocol detection: probe both HTTP and HTTPS, return the better result.
406
+
407
+ This handles cases where:
408
+ - nmap reports HTTP but server is actually HTTPS
409
+ - Server serves different content on HTTP vs HTTPS
410
+ - HTTP redirects to HTTPS (or vice versa)
411
+
412
+ Returns:
413
+ tuple: (result_dict, effective_url)
414
+ """
415
+ parsed = urlparse(target)
416
+ host = parsed.hostname
417
+ port = parsed.port or (443 if parsed.scheme == "https" else 80)
418
+
419
+ # Build both URL variants
420
+ http_url = (
421
+ f"http://{host}:{port}"
422
+ if port not in (80, 443)
423
+ else f"http://{host}" if port == 80 else f"http://{host}:{port}"
424
+ )
425
+ https_url = (
426
+ f"https://{host}:{port}"
427
+ if port not in (80, 443)
428
+ else f"https://{host}" if port == 443 else f"https://{host}:{port}"
429
+ )
430
+
431
+ # Handle standard ports correctly
432
+ if port == 80:
433
+ http_url = f"http://{host}"
434
+ https_url = f"https://{host}:80" # Non-standard HTTPS on port 80
435
+ elif port == 443:
436
+ http_url = f"http://{host}:443" # Non-standard HTTP on port 443
437
+ https_url = f"https://{host}"
438
+ else:
439
+ http_url = f"http://{host}:{port}"
440
+ https_url = f"https://{host}:{port}"
441
+
442
+ # Probe the original protocol first
443
+ original_is_https = parsed.scheme == "https"
444
+ primary_url = target
445
+ alternate_url = https_url if not original_is_https else http_url
446
+
447
+ # Probe primary (original) URL
448
+ primary_result = self._fingerprint(primary_url, timeout)
449
+
450
+ # Calculate "richness" score for primary result
451
+ primary_score = self._calculate_result_richness(primary_result)
452
+ primary_status = primary_result.get("status_code") or 0
453
+
454
+ # Check if primary result is "good enough" to skip alternate probe
455
+ # Must have: successful status (2xx/3xx), decent score, no errors
456
+ # 4xx/5xx status means we MUST try alternate protocol (could be wrong protocol)
457
+ primary_is_successful = 200 <= primary_status < 400
458
+
459
+ if (
460
+ primary_is_successful
461
+ and primary_score >= 3
462
+ and not primary_result.get("error")
463
+ ):
464
+ primary_result["protocol_detection"] = "primary"
465
+ primary_result["effective_url"] = primary_url
466
+ return primary_result, primary_url
467
+
468
+ # Otherwise, probe alternate protocol (primary failed, errored, or got 4xx/5xx)
469
+ alternate_result = self._fingerprint(alternate_url, timeout)
470
+ alternate_score = self._calculate_result_richness(alternate_result)
471
+
472
+ # Compare and choose the better result
473
+ if alternate_score > primary_score and not alternate_result.get("error"):
474
+ # Alternate protocol is better
475
+ alternate_result["protocol_detection"] = "upgraded"
476
+ alternate_result["protocol_note"] = (
477
+ f"Switched from {parsed.scheme.upper()} to {'HTTPS' if not original_is_https else 'HTTP'} (richer response)"
478
+ )
479
+ alternate_result["original_url"] = primary_url
480
+ alternate_result["effective_url"] = alternate_url
481
+ return alternate_result, alternate_url
482
+ elif not primary_result.get("error"):
483
+ # Primary is fine or equal
484
+ primary_result["protocol_detection"] = "primary"
485
+ primary_result["effective_url"] = primary_url
486
+ return primary_result, primary_url
487
+ elif not alternate_result.get("error"):
488
+ # Primary failed, alternate works
489
+ alternate_result["protocol_detection"] = "fallback"
490
+ alternate_result["protocol_note"] = (
491
+ f"Primary ({parsed.scheme.upper()}) failed, using {'HTTPS' if not original_is_https else 'HTTP'}"
492
+ )
493
+ alternate_result["original_url"] = primary_url
494
+ alternate_result["effective_url"] = alternate_url
495
+ return alternate_result, alternate_url
496
+ else:
497
+ # Both failed, return primary with error
498
+ primary_result["protocol_detection"] = "failed"
499
+ primary_result["effective_url"] = primary_url
500
+ return primary_result, primary_url
501
+
502
+ def _calculate_result_richness(self, result: Dict[str, Any]) -> int:
503
+ """
504
+ Calculate a "richness" score for fingerprint results.
505
+ Higher score = more useful/valid response.
506
+ """
507
+ score = 0
508
+
509
+ # Error = bad
510
+ if result.get("error"):
511
+ return 0
512
+
513
+ # Status code scoring
514
+ status = result.get("status_code")
515
+ if status == 200:
516
+ score += 3
517
+ elif status in (301, 302, 303, 307, 308):
518
+ score += 2 # Redirects are informative
519
+ elif status in (401, 403):
520
+ score += 2 # Auth required = real service
521
+ elif status in (404, 500, 502, 503):
522
+ score += 1 # At least it responded
523
+
524
+ # Has server header
525
+ if result.get("server"):
526
+ score += 1
527
+
528
+ # Has technologies detected
529
+ if result.get("technologies"):
530
+ score += len(result["technologies"])
531
+
532
+ # Has TLS info (means HTTPS worked)
533
+ if result.get("tls"):
534
+ score += 2
535
+
536
+ # Has WAF/CDN detection
537
+ if result.get("waf"):
538
+ score += 1
539
+ if result.get("cdn"):
540
+ score += 1
541
+
542
+ # Has headers (more headers = richer response)
543
+ headers = result.get("headers", {})
544
+ if len(headers) > 5:
545
+ score += 2
546
+ elif len(headers) > 0:
547
+ score += 1
548
+
549
+ return score
550
+
298
551
  def _fingerprint(self, url: str, timeout: int = 10) -> Dict[str, Any]:
299
552
  """
300
553
  Perform HTTP fingerprinting on target URL.
@@ -308,46 +561,49 @@ class HttpFingerprintPlugin(PluginBase):
308
561
  - technologies: List of detected technologies
309
562
  - tls: TLS/SSL information (for HTTPS)
310
563
  """
311
- import urllib.request
312
564
  import urllib.error
565
+ import urllib.request
313
566
 
314
567
  result = {
315
- 'server': None,
316
- 'server_version': None,
317
- 'waf': [],
318
- 'cdn': [],
319
- 'managed_hosting': None,
320
- 'technologies': [],
321
- 'headers': {},
322
- 'cookies': [],
323
- 'tls': None,
324
- 'status_code': None,
325
- 'redirect_url': None,
568
+ "server": None,
569
+ "server_version": None,
570
+ "waf": [],
571
+ "cdn": [],
572
+ "managed_hosting": None,
573
+ "technologies": [],
574
+ "headers": {},
575
+ "cookies": [],
576
+ "tls": None,
577
+ "status_code": None,
578
+ "redirect_url": None,
326
579
  }
327
580
 
328
581
  parsed = urlparse(url)
329
582
 
330
583
  # Security: Only allow http/https schemes (B310 - prevent file:// or custom schemes)
331
- if parsed.scheme not in ('http', 'https'):
332
- result['error'] = f"Invalid URL scheme: {parsed.scheme}. Only http/https allowed."
584
+ if parsed.scheme not in ("http", "https"):
585
+ result["error"] = (
586
+ f"Invalid URL scheme: {parsed.scheme}. Only http/https allowed."
587
+ )
333
588
  return result
334
589
 
335
- is_https = parsed.scheme == 'https'
590
+ is_https = parsed.scheme == "https"
336
591
 
337
592
  # Check if target is an IP address (for special handling)
338
593
  import re
339
- is_ip_target = bool(re.match(r'^(\d{1,3}\.){3}\d{1,3}$', parsed.hostname or ''))
594
+
595
+ is_ip_target = bool(re.match(r"^(\d{1,3}\.){3}\d{1,3}$", parsed.hostname or ""))
340
596
 
341
597
  # Create request with common browser headers
342
598
  req = urllib.request.Request(
343
599
  url,
344
600
  headers={
345
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
346
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
347
- 'Accept-Language': 'en-US,en;q=0.5',
348
- 'Accept-Encoding': 'identity',
349
- 'Connection': 'close',
350
- }
601
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
602
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
603
+ "Accept-Language": "en-US,en;q=0.5",
604
+ "Accept-Encoding": "identity",
605
+ "Connection": "close",
606
+ },
351
607
  )
352
608
 
353
609
  # Always create SSL context with verification disabled
@@ -360,79 +616,89 @@ class HttpFingerprintPlugin(PluginBase):
360
616
  # Get TLS info for HTTPS targets
361
617
  if is_https:
362
618
  try:
363
- with socket.create_connection((parsed.hostname, parsed.port or 443), timeout=timeout) as sock:
364
- with ctx.wrap_socket(sock, server_hostname=parsed.hostname) as ssock:
619
+ with socket.create_connection(
620
+ (parsed.hostname, parsed.port or 443), timeout=timeout
621
+ ) as sock:
622
+ with ctx.wrap_socket(
623
+ sock, server_hostname=parsed.hostname
624
+ ) as ssock:
365
625
  cert = ssock.getpeercert(binary_form=True)
366
626
  cipher = ssock.cipher()
367
627
  version = ssock.version()
368
- result['tls'] = {
369
- 'version': version,
370
- 'cipher': cipher[0] if cipher else None,
371
- 'bits': cipher[2] if cipher else None,
628
+ result["tls"] = {
629
+ "version": version,
630
+ "cipher": cipher[0] if cipher else None,
631
+ "bits": cipher[2] if cipher else None,
372
632
  }
373
633
  except Exception:
374
634
  pass # TLS info is optional
375
635
 
376
636
  # Always pass SSL context (handles HTTP->HTTPS redirects)
377
- response = urllib.request.urlopen(req, timeout=timeout, context=ctx) # nosec B310 - scheme validated above
637
+ response = urllib.request.urlopen(
638
+ req, timeout=timeout, context=ctx
639
+ ) # nosec B310 - scheme validated above
378
640
 
379
- result['status_code'] = response.getcode()
641
+ result["status_code"] = response.getcode()
380
642
 
381
643
  # Get headers
382
644
  headers = {k.lower(): v for k, v in response.headers.items()}
383
- result['headers'] = dict(response.headers)
645
+ result["headers"] = dict(response.headers)
384
646
 
385
647
  # Check for redirect
386
648
  if response.geturl() != url:
387
- result['redirect_url'] = response.geturl()
649
+ result["redirect_url"] = response.geturl()
388
650
 
389
651
  # Parse cookies
390
- if 'set-cookie' in headers:
391
- cookies = headers.get('set-cookie', '')
392
- result['cookies'] = [c.strip() for c in cookies.split(',')]
652
+ if "set-cookie" in headers:
653
+ cookies = headers.get("set-cookie", "")
654
+ result["cookies"] = [c.strip() for c in cookies.split(",")]
393
655
 
394
656
  # Detect server
395
- server_header = headers.get('server', '').lower()
396
- result['server'] = headers.get('server')
657
+ server_header = headers.get("server", "").lower()
658
+ result["server"] = headers.get("server")
397
659
 
398
660
  for sig, name in SERVER_SIGNATURES.items():
399
661
  if sig in server_header:
400
- result['server_version'] = name
401
- result['technologies'].append(name)
662
+ result["server_version"] = name
663
+ result["technologies"].append(name)
402
664
  break
403
665
 
404
666
  # Detect WAF
405
- result['waf'] = self._detect_waf(headers, result['cookies'])
667
+ result["waf"] = self._detect_waf(headers, result["cookies"])
406
668
 
407
669
  # Detect CDN
408
- result['cdn'] = self._detect_cdn(headers, server_header)
670
+ result["cdn"] = self._detect_cdn(headers, server_header)
409
671
 
410
672
  # Detect managed hosting
411
- result['managed_hosting'] = self._detect_managed_hosting(headers, server_header, result['cookies'])
673
+ result["managed_hosting"] = self._detect_managed_hosting(
674
+ headers, server_header, result["cookies"]
675
+ )
412
676
 
413
677
  # Detect technologies from headers
414
678
  self._detect_technologies(headers, result)
415
679
 
416
680
  except urllib.error.HTTPError as e:
417
681
  # Even errors give us useful headers
418
- result['status_code'] = e.code
682
+ result["status_code"] = e.code
419
683
  headers = {k.lower(): v for k, v in e.headers.items()}
420
- result['headers'] = dict(e.headers)
421
- result['server'] = headers.get('server')
684
+ result["headers"] = dict(e.headers)
685
+ result["server"] = headers.get("server")
422
686
 
423
- server_header = headers.get('server', '').lower()
424
- result['waf'] = self._detect_waf(headers, [])
425
- result['cdn'] = self._detect_cdn(headers, server_header)
426
- result['managed_hosting'] = self._detect_managed_hosting(headers, server_header, [])
687
+ server_header = headers.get("server", "").lower()
688
+ result["waf"] = self._detect_waf(headers, [])
689
+ result["cdn"] = self._detect_cdn(headers, server_header)
690
+ result["managed_hosting"] = self._detect_managed_hosting(
691
+ headers, server_header, []
692
+ )
427
693
 
428
694
  except urllib.error.URLError as e:
429
- result['error'] = str(e.reason)
695
+ result["error"] = str(e.reason)
430
696
 
431
697
  except socket.timeout:
432
- result['error'] = 'Connection timed out'
698
+ result["error"] = "Connection timed out"
433
699
 
434
700
  except Exception as e:
435
- result['error'] = f'{type(e).__name__}: {e}'
701
+ result["error"] = f"{type(e).__name__}: {e}"
436
702
 
437
703
  return result
438
704
 
@@ -441,20 +707,20 @@ class HttpFingerprintPlugin(PluginBase):
441
707
  detected = []
442
708
 
443
709
  # Check headers
444
- for header, signatures in WAF_SIGNATURES['headers'].items():
445
- header_val = headers.get(header, '').lower()
710
+ for header, signatures in WAF_SIGNATURES["headers"].items():
711
+ header_val = headers.get(header, "").lower()
446
712
  if header_val:
447
713
  if isinstance(signatures, dict):
448
714
  for sig, waf_name in signatures.items():
449
- if sig == '' or sig in header_val:
715
+ if sig == "" or sig in header_val:
450
716
  if waf_name and waf_name not in detected:
451
717
  detected.append(waf_name)
452
718
  elif isinstance(signatures, str) and signatures not in detected:
453
719
  detected.append(signatures)
454
720
 
455
721
  # Check cookies
456
- cookie_str = ' '.join(cookies).lower()
457
- for cookie_sig, waf_name in WAF_SIGNATURES['cookies'].items():
722
+ cookie_str = " ".join(cookies).lower()
723
+ for cookie_sig, waf_name in WAF_SIGNATURES["cookies"].items():
458
724
  if cookie_sig.lower() in cookie_str:
459
725
  if waf_name not in detected:
460
726
  detected.append(waf_name)
@@ -466,8 +732,8 @@ class HttpFingerprintPlugin(PluginBase):
466
732
  detected = []
467
733
 
468
734
  # Check specific headers
469
- for header, cdn_info in CDN_SIGNATURES['headers'].items():
470
- header_val = headers.get(header, '').lower()
735
+ for header, cdn_info in CDN_SIGNATURES["headers"].items():
736
+ header_val = headers.get(header, "").lower()
471
737
  if header_val:
472
738
  if isinstance(cdn_info, dict):
473
739
  for sig, cdn_name in cdn_info.items():
@@ -477,28 +743,30 @@ class HttpFingerprintPlugin(PluginBase):
477
743
  detected.append(cdn_info)
478
744
 
479
745
  # Check server header
480
- for sig, cdn_name in CDN_SIGNATURES['server'].items():
746
+ for sig, cdn_name in CDN_SIGNATURES["server"].items():
481
747
  if sig in server_header and cdn_name not in detected:
482
748
  detected.append(cdn_name)
483
749
 
484
750
  return detected
485
751
 
486
- def _detect_managed_hosting(self, headers: Dict[str, str], server_header: str, cookies: List[str]) -> Optional[str]:
752
+ def _detect_managed_hosting(
753
+ self, headers: Dict[str, str], server_header: str, cookies: List[str]
754
+ ) -> Optional[str]:
487
755
  """Detect managed hosting platform."""
488
756
  # Check server header first (most reliable)
489
- for sig, platform in MANAGED_HOSTING_SIGNATURES['server'].items():
757
+ for sig, platform in MANAGED_HOSTING_SIGNATURES["server"].items():
490
758
  if sig in server_header:
491
759
  return platform
492
760
 
493
761
  # Check specific headers
494
- for header_prefix, platform in MANAGED_HOSTING_SIGNATURES['headers'].items():
762
+ for header_prefix, platform in MANAGED_HOSTING_SIGNATURES["headers"].items():
495
763
  for header in headers:
496
764
  if header.lower().startswith(header_prefix.lower()):
497
765
  return platform
498
766
 
499
767
  # Check cookies
500
- cookie_str = ' '.join(cookies).lower()
501
- for cookie_sig, platform in MANAGED_HOSTING_SIGNATURES['cookies'].items():
768
+ cookie_str = " ".join(cookies).lower()
769
+ for cookie_sig, platform in MANAGED_HOSTING_SIGNATURES["cookies"].items():
502
770
  if cookie_sig.lower() in cookie_str:
503
771
  return platform
504
772
 
@@ -506,31 +774,309 @@ class HttpFingerprintPlugin(PluginBase):
506
774
 
507
775
  def _detect_technologies(self, headers: Dict[str, str], result: Dict[str, Any]):
508
776
  """Detect additional technologies from headers."""
509
- techs = result['technologies']
777
+ techs = result["technologies"]
510
778
 
511
779
  # X-Powered-By
512
- powered_by = headers.get('x-powered-by', '')
780
+ powered_by = headers.get("x-powered-by", "")
513
781
  if powered_by:
514
- if 'php' in powered_by.lower():
515
- techs.append(f'PHP ({powered_by})')
516
- elif 'asp.net' in powered_by.lower():
517
- techs.append(f'ASP.NET ({powered_by})')
518
- elif 'express' in powered_by.lower():
519
- techs.append('Express.js')
782
+ if "php" in powered_by.lower():
783
+ techs.append(f"PHP ({powered_by})")
784
+ elif "asp.net" in powered_by.lower():
785
+ techs.append(f"ASP.NET ({powered_by})")
786
+ elif "express" in powered_by.lower():
787
+ techs.append("Express.js")
520
788
  elif powered_by not in techs:
521
789
  techs.append(powered_by)
522
790
 
523
791
  # X-AspNet-Version
524
- aspnet_ver = headers.get('x-aspnet-version', '')
792
+ aspnet_ver = headers.get("x-aspnet-version", "")
525
793
  if aspnet_ver:
526
- techs.append(f'ASP.NET {aspnet_ver}')
794
+ techs.append(f"ASP.NET {aspnet_ver}")
527
795
 
528
796
  # X-Generator
529
- generator = headers.get('x-generator', '')
797
+ generator = headers.get("x-generator", "")
530
798
  if generator:
531
799
  techs.append(generator)
532
800
 
533
- result['technologies'] = list(set(techs))
801
+ result["technologies"] = list(set(techs))
802
+
803
+ def _fetch_robots_sitemap(self, base_url: str, timeout: int = 10) -> tuple:
804
+ """
805
+ Fetch robots.txt and sitemap.xml to extract paths for discovery.
806
+
807
+ This runs early in the recon chain so discovered paths can trigger
808
+ follow-up scans even if gobuster's wordlist doesn't include them.
809
+
810
+ Returns:
811
+ tuple: (robots_paths, sitemap_paths) - lists of discovered URLs
812
+ """
813
+ import re
814
+ import urllib.error
815
+ import urllib.request
816
+ from urllib.parse import urljoin
817
+
818
+ try:
819
+ import defusedxml.ElementTree as ElementTree
820
+ except ImportError:
821
+ import xml.etree.ElementTree as ElementTree
822
+
823
+ parsed = urlparse(base_url)
824
+ base = f"{parsed.scheme}://{parsed.netloc}"
825
+
826
+ robots_paths = []
827
+ sitemap_paths = []
828
+
829
+ # Create SSL context for self-signed certs
830
+ ctx = ssl.create_default_context()
831
+ ctx.check_hostname = False
832
+ ctx.verify_mode = ssl.CERT_NONE
833
+
834
+ # === Fetch robots.txt ===
835
+ try:
836
+ robots_url = urljoin(base + "/", "robots.txt")
837
+ req = urllib.request.Request(
838
+ robots_url,
839
+ headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
840
+ )
841
+ with urllib.request.urlopen(req, timeout=timeout, context=ctx) as response:
842
+ if response.getcode() == 200:
843
+ content = response.read().decode("utf-8", errors="replace")
844
+
845
+ # Known directives to skip
846
+ known_directives = [
847
+ "user-agent:",
848
+ "disallow:",
849
+ "allow:",
850
+ "sitemap:",
851
+ "crawl-delay:",
852
+ "host:",
853
+ "request-rate:",
854
+ ]
855
+
856
+ for line in content.split("\n"):
857
+ line = line.strip()
858
+ if not line or line.startswith("#"):
859
+ continue
860
+
861
+ line_lower = line.lower()
862
+
863
+ # Extract Disallow/Allow paths
864
+ if line_lower.startswith("disallow:") or line_lower.startswith(
865
+ "allow:"
866
+ ):
867
+ _, _, path = line.partition(":")
868
+ path = path.strip()
869
+ if (
870
+ path
871
+ and path != "/"
872
+ and "*" not in path
873
+ and "?" not in path
874
+ ):
875
+ full_url = urljoin(base + "/", path.lstrip("/"))
876
+ if full_url not in robots_paths:
877
+ robots_paths.append(full_url)
878
+
879
+ # Extract Sitemap URLs
880
+ elif line_lower.startswith("sitemap:"):
881
+ _, _, sitemap_url = line.partition(":")
882
+ sitemap_url = sitemap_url.strip()
883
+ # Handle "Sitemap: http://..." format
884
+ if sitemap_url.startswith("//"):
885
+ sitemap_url = parsed.scheme + ":" + sitemap_url
886
+ elif not sitemap_url.startswith("http"):
887
+ sitemap_url = urljoin(
888
+ base + "/", sitemap_url.lstrip("/")
889
+ )
890
+ if sitemap_url not in sitemap_paths:
891
+ sitemap_paths.append(sitemap_url)
892
+
893
+ # Extract bare file paths (CTF-style hints like "key-1-of-3.txt")
894
+ elif not any(
895
+ line_lower.startswith(d) for d in known_directives
896
+ ):
897
+ path = line.strip()
898
+ # Must look like a file with extension
899
+ if path and re.match(r"^[\w\-./]+\.\w{1,5}$", path):
900
+ full_url = urljoin(base + "/", path.lstrip("/"))
901
+ if full_url not in robots_paths:
902
+ robots_paths.append(full_url)
903
+
904
+ except Exception:
905
+ pass # robots.txt fetch is optional
906
+
907
+ # === Fetch sitemap.xml (if not found in robots.txt) ===
908
+ if not sitemap_paths:
909
+ sitemap_paths.append(urljoin(base + "/", "sitemap.xml"))
910
+
911
+ # Try to parse each sitemap
912
+ all_sitemap_urls = []
913
+ for sitemap_url in sitemap_paths[:3]: # Limit to first 3 sitemaps
914
+ try:
915
+ req = urllib.request.Request(
916
+ sitemap_url,
917
+ headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
918
+ )
919
+ with urllib.request.urlopen(
920
+ req, timeout=timeout, context=ctx
921
+ ) as response:
922
+ if response.getcode() == 200:
923
+ content = response.read().decode("utf-8", errors="replace")
924
+ try:
925
+ root = ElementTree.fromstring(content)
926
+ ns = {"sm": "http://www.sitemaps.org/schemas/sitemap/0.9"}
927
+
928
+ # Try with namespace
929
+ for loc in root.findall(".//sm:loc", ns):
930
+ if loc.text and loc.text not in all_sitemap_urls:
931
+ all_sitemap_urls.append(loc.text.strip())
932
+
933
+ # Try without namespace
934
+ if not all_sitemap_urls:
935
+ for loc in root.findall(".//loc"):
936
+ if loc.text and loc.text not in all_sitemap_urls:
937
+ all_sitemap_urls.append(loc.text.strip())
938
+
939
+ except ElementTree.ParseError:
940
+ # Fallback to regex
941
+ loc_matches = re.findall(r"<loc>([^<]+)</loc>", content)
942
+ for url in loc_matches:
943
+ if url not in all_sitemap_urls:
944
+ all_sitemap_urls.append(url)
945
+
946
+ except Exception:
947
+ pass # sitemap fetch is optional
948
+
949
+ # Replace sitemap_paths with actual URLs from sitemaps (limit to 50)
950
+ if all_sitemap_urls:
951
+ sitemap_paths = all_sitemap_urls[:50]
952
+ else:
953
+ sitemap_paths = [] # Clear if sitemap didn't exist
954
+
955
+ return robots_paths, sitemap_paths
956
+
957
+ def _quick_path_probe(self, base_url: str, timeout: int = 10) -> Dict[str, Any]:
958
+ """
959
+ Quick path probing for CMS detection, admin panels, and API indicators.
960
+
961
+ Uses HEAD requests to minimize bandwidth and noise. Only checks paths
962
+ that return 2xx/3xx/401/403 status codes (indicates existence).
963
+
964
+ Returns:
965
+ dict: {
966
+ 'cms': {'name': str, 'paths': list} or None,
967
+ 'admin_panels': [{'path': str, 'status': int}],
968
+ 'api_endpoints': [{'path': str, 'status': int, 'type': str}]
969
+ }
970
+ """
971
+ import urllib.error
972
+ import urllib.request
973
+
974
+ parsed = urlparse(base_url)
975
+ base = f"{parsed.scheme}://{parsed.netloc}"
976
+
977
+ # Create SSL context for self-signed certs
978
+ ctx = ssl.create_default_context()
979
+ ctx.check_hostname = False
980
+ ctx.verify_mode = ssl.CERT_NONE
981
+
982
+ result = {"cms": None, "admin_panels": [], "api_endpoints": []}
983
+
984
+ # Define paths to check
985
+ # Format: (path, category, subcategory/type)
986
+ paths_to_check = [
987
+ # CMS Detection
988
+ ("/wp-admin/", "cms", "WordPress"),
989
+ ("/wp-login.php", "cms", "WordPress"),
990
+ ("/wp-includes/", "cms", "WordPress"),
991
+ ("/administrator/", "cms", "Joomla"),
992
+ ("/components/com_content/", "cms", "Joomla"),
993
+ ("/user/login", "cms", "Drupal"),
994
+ ("/core/misc/drupal.js", "cms", "Drupal"),
995
+ ("/typo3/", "cms", "TYPO3"),
996
+ ("/sitecore/", "cms", "Sitecore"),
997
+ # Admin Panels
998
+ ("/phpmyadmin/", "admin", "phpMyAdmin"),
999
+ ("/pma/", "admin", "phpMyAdmin"),
1000
+ ("/admin/", "admin", "Admin Panel"),
1001
+ ("/admin/login", "admin", "Admin Login"),
1002
+ ("/login/", "admin", "Login Page"),
1003
+ ("/login.php", "admin", "Login Page"),
1004
+ ("/manager/", "admin", "Manager"),
1005
+ ("/cpanel/", "admin", "cPanel"),
1006
+ ("/webmail/", "admin", "Webmail"),
1007
+ # API Indicators
1008
+ ("/api/", "api", "REST API"),
1009
+ ("/api/v1/", "api", "REST API v1"),
1010
+ ("/api/v2/", "api", "REST API v2"),
1011
+ ("/graphql", "api", "GraphQL"),
1012
+ ("/graphql/", "api", "GraphQL"),
1013
+ ("/swagger.json", "api", "Swagger/OpenAPI"),
1014
+ ("/swagger/", "api", "Swagger UI"),
1015
+ ("/openapi.json", "api", "OpenAPI"),
1016
+ ("/api-docs/", "api", "API Docs"),
1017
+ ("/v1/", "api", "API v1"),
1018
+ ("/rest/", "api", "REST API"),
1019
+ ]
1020
+
1021
+ # Track CMS detections to avoid duplicates
1022
+ cms_detected = {}
1023
+
1024
+ for path, category, subtype in paths_to_check:
1025
+ try:
1026
+ url = base.rstrip("/") + path
1027
+ req = urllib.request.Request(
1028
+ url,
1029
+ method="HEAD",
1030
+ headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
1031
+ )
1032
+
1033
+ try:
1034
+ with urllib.request.urlopen(
1035
+ req, timeout=timeout, context=ctx
1036
+ ) as response:
1037
+ status = response.getcode()
1038
+ except urllib.error.HTTPError as e:
1039
+ status = e.code
1040
+
1041
+ # Consider 2xx, 3xx, 401, 403 as "exists"
1042
+ if status in (200, 201, 204, 301, 302, 303, 307, 308, 401, 403):
1043
+ if category == "cms":
1044
+ if subtype not in cms_detected:
1045
+ cms_detected[subtype] = []
1046
+ cms_detected[subtype].append({"path": path, "status": status})
1047
+ elif category == "admin":
1048
+ result["admin_panels"].append(
1049
+ {
1050
+ "path": path,
1051
+ "name": subtype,
1052
+ "status": status,
1053
+ "url": url,
1054
+ }
1055
+ )
1056
+ elif category == "api":
1057
+ result["api_endpoints"].append(
1058
+ {
1059
+ "path": path,
1060
+ "type": subtype,
1061
+ "status": status,
1062
+ "url": url,
1063
+ }
1064
+ )
1065
+
1066
+ except Exception:
1067
+ # Timeout or connection error - skip this path
1068
+ continue
1069
+
1070
+ # Determine primary CMS (most path matches)
1071
+ if cms_detected:
1072
+ best_cms = max(cms_detected.items(), key=lambda x: len(x[1]))
1073
+ result["cms"] = {
1074
+ "name": best_cms[0],
1075
+ "paths": best_cms[1],
1076
+ "confidence": "high" if len(best_cms[1]) >= 2 else "medium",
1077
+ }
1078
+
1079
+ return result
534
1080
 
535
1081
  def _format_output(self, target: str, result: Dict[str, Any], label: str) -> str:
536
1082
  """Format fingerprint results for log output."""
@@ -539,33 +1085,50 @@ class HttpFingerprintPlugin(PluginBase):
539
1085
  lines.append(f"Target: {target}")
540
1086
  if label:
541
1087
  lines.append(f"Label: {label}")
542
- lines.append(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}")
1088
+ lines.append(
1089
+ f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}"
1090
+ )
543
1091
  lines.append("=" * 60)
544
1092
  lines.append("")
545
1093
 
546
- if result.get('error'):
1094
+ if result.get("error"):
547
1095
  lines.append(f"ERROR: {result['error']}")
548
- return '\n'.join(lines)
1096
+ return "\n".join(lines)
1097
+
1098
+ # Protocol detection info (smart probe results)
1099
+ protocol_detection = result.get("protocol_detection")
1100
+ if protocol_detection in ("upgraded", "fallback"):
1101
+ lines.append("-" * 40)
1102
+ lines.append(f"PROTOCOL DETECTION: {protocol_detection.upper()}")
1103
+ if result.get("protocol_note"):
1104
+ lines.append(f" {result['protocol_note']}")
1105
+ if result.get("original_url"):
1106
+ lines.append(f" Original URL: {result['original_url']}")
1107
+ lines.append(f" Effective URL: {result.get('effective_url', target)}")
1108
+ lines.append("-" * 40)
1109
+ lines.append("")
549
1110
 
550
1111
  # Status
551
1112
  lines.append(f"HTTP Status: {result.get('status_code', 'N/A')}")
552
1113
 
553
- if result.get('redirect_url'):
1114
+ if result.get("redirect_url"):
554
1115
  lines.append(f"Redirected to: {result['redirect_url']}")
555
1116
 
556
1117
  # Server
557
- if result.get('server'):
1118
+ if result.get("server"):
558
1119
  lines.append(f"Server: {result['server']}")
559
1120
 
560
1121
  # TLS
561
- if result.get('tls'):
562
- tls = result['tls']
563
- lines.append(f"TLS: {tls.get('version', 'Unknown')} ({tls.get('cipher', 'Unknown')})")
1122
+ if result.get("tls"):
1123
+ tls = result["tls"]
1124
+ lines.append(
1125
+ f"TLS: {tls.get('version', 'Unknown')} ({tls.get('cipher', 'Unknown')})"
1126
+ )
564
1127
 
565
1128
  lines.append("")
566
1129
 
567
1130
  # Managed Hosting (most important for tool decisions)
568
- if result.get('managed_hosting'):
1131
+ if result.get("managed_hosting"):
569
1132
  lines.append("-" * 40)
570
1133
  lines.append(f"MANAGED HOSTING DETECTED: {result['managed_hosting']}")
571
1134
  lines.append(" -> CGI enumeration will be skipped")
@@ -574,29 +1137,53 @@ class HttpFingerprintPlugin(PluginBase):
574
1137
  lines.append("")
575
1138
 
576
1139
  # WAF
577
- if result.get('waf'):
1140
+ if result.get("waf"):
578
1141
  lines.append(f"WAF/Protection Detected:")
579
- for waf in result['waf']:
1142
+ for waf in result["waf"]:
580
1143
  lines.append(f" - {waf}")
581
1144
  lines.append("")
582
1145
 
583
1146
  # CDN
584
- if result.get('cdn'):
1147
+ if result.get("cdn"):
585
1148
  lines.append(f"CDN Detected:")
586
- for cdn in result['cdn']:
1149
+ for cdn in result["cdn"]:
587
1150
  lines.append(f" - {cdn}")
588
1151
  lines.append("")
589
1152
 
590
1153
  # Technologies
591
- if result.get('technologies'):
1154
+ if result.get("technologies"):
592
1155
  lines.append(f"Technologies:")
593
- for tech in result['technologies']:
1156
+ for tech in result["technologies"]:
594
1157
  lines.append(f" - {tech}")
595
1158
  lines.append("")
596
1159
 
597
- lines.append(f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===")
1160
+ # Robots.txt paths (discovered files/directories)
1161
+ robots_paths = result.get("robots_paths", [])
1162
+ if robots_paths:
1163
+ lines.append("-" * 40)
1164
+ lines.append(f"ROBOTS.TXT PATHS ({len(robots_paths)} found):")
1165
+ for path in robots_paths[:20]:
1166
+ lines.append(f" - {path}")
1167
+ if len(robots_paths) > 20:
1168
+ lines.append(f" ... and {len(robots_paths) - 20} more")
1169
+ lines.append("-" * 40)
1170
+ lines.append("")
1171
+
1172
+ # Sitemap URLs
1173
+ sitemap_paths = result.get("sitemap_paths", [])
1174
+ if sitemap_paths:
1175
+ lines.append(f"SITEMAP URLS ({len(sitemap_paths)} found):")
1176
+ for url in sitemap_paths[:10]:
1177
+ lines.append(f" - {url}")
1178
+ if len(sitemap_paths) > 10:
1179
+ lines.append(f" ... and {len(sitemap_paths) - 10} more")
1180
+ lines.append("")
1181
+
1182
+ lines.append(
1183
+ f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ==="
1184
+ )
598
1185
 
599
- return '\n'.join(lines)
1186
+ return "\n".join(lines)
600
1187
 
601
1188
 
602
1189
  plugin = HttpFingerprintPlugin()