souleyez 2.43.29__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- souleyez/__init__.py +1 -2
- souleyez/ai/__init__.py +21 -15
- souleyez/ai/action_mapper.py +249 -150
- souleyez/ai/chain_advisor.py +116 -100
- souleyez/ai/claude_provider.py +29 -28
- souleyez/ai/context_builder.py +80 -62
- souleyez/ai/executor.py +158 -117
- souleyez/ai/feedback_handler.py +136 -121
- souleyez/ai/llm_factory.py +27 -20
- souleyez/ai/llm_provider.py +4 -2
- souleyez/ai/ollama_provider.py +6 -9
- souleyez/ai/ollama_service.py +44 -37
- souleyez/ai/path_scorer.py +91 -76
- souleyez/ai/recommender.py +176 -144
- souleyez/ai/report_context.py +74 -73
- souleyez/ai/report_service.py +84 -66
- souleyez/ai/result_parser.py +222 -229
- souleyez/ai/safety.py +67 -44
- souleyez/auth/__init__.py +23 -22
- souleyez/auth/audit.py +36 -26
- souleyez/auth/engagement_access.py +65 -48
- souleyez/auth/permissions.py +14 -3
- souleyez/auth/session_manager.py +54 -37
- souleyez/auth/user_manager.py +109 -64
- souleyez/commands/audit.py +40 -43
- souleyez/commands/auth.py +35 -15
- souleyez/commands/deliverables.py +55 -50
- souleyez/commands/engagement.py +47 -28
- souleyez/commands/license.py +32 -23
- souleyez/commands/screenshots.py +36 -32
- souleyez/commands/user.py +82 -36
- souleyez/config.py +52 -44
- souleyez/core/credential_tester.py +87 -81
- souleyez/core/cve_mappings.py +179 -192
- souleyez/core/cve_matcher.py +162 -148
- souleyez/core/msf_auto_mapper.py +100 -83
- souleyez/core/msf_chain_engine.py +294 -256
- souleyez/core/msf_database.py +153 -70
- souleyez/core/msf_integration.py +679 -673
- souleyez/core/msf_rpc_client.py +40 -42
- souleyez/core/msf_rpc_manager.py +77 -79
- souleyez/core/msf_sync_manager.py +241 -181
- souleyez/core/network_utils.py +22 -15
- souleyez/core/parser_handler.py +34 -25
- souleyez/core/pending_chains.py +114 -63
- souleyez/core/templates.py +158 -107
- souleyez/core/tool_chaining.py +9564 -2881
- souleyez/core/version_utils.py +79 -94
- souleyez/core/vuln_correlation.py +136 -89
- souleyez/core/web_utils.py +33 -32
- souleyez/data/wordlists/ad_users.txt +378 -0
- souleyez/data/wordlists/api_endpoints_large.txt +769 -0
- souleyez/data/wordlists/home_dir_sensitive.txt +39 -0
- souleyez/data/wordlists/lfi_payloads.txt +82 -0
- souleyez/data/wordlists/passwords_brute.txt +1548 -0
- souleyez/data/wordlists/passwords_crack.txt +2479 -0
- souleyez/data/wordlists/passwords_spray.txt +386 -0
- souleyez/data/wordlists/subdomains_large.txt +5057 -0
- souleyez/data/wordlists/usernames_common.txt +694 -0
- souleyez/data/wordlists/web_dirs_large.txt +4769 -0
- souleyez/detection/__init__.py +1 -1
- souleyez/detection/attack_signatures.py +12 -17
- souleyez/detection/mitre_mappings.py +61 -55
- souleyez/detection/validator.py +97 -86
- souleyez/devtools.py +23 -10
- souleyez/docs/README.md +4 -4
- souleyez/docs/api-reference/cli-commands.md +2 -2
- souleyez/docs/developer-guide/adding-new-tools.md +562 -0
- souleyez/docs/user-guide/auto-chaining.md +30 -8
- souleyez/docs/user-guide/getting-started.md +1 -1
- souleyez/docs/user-guide/installation.md +26 -3
- souleyez/docs/user-guide/metasploit-integration.md +2 -2
- souleyez/docs/user-guide/rbac.md +1 -1
- souleyez/docs/user-guide/scope-management.md +1 -1
- souleyez/docs/user-guide/siem-integration.md +1 -1
- souleyez/docs/user-guide/tools-reference.md +1 -8
- souleyez/docs/user-guide/worker-management.md +1 -1
- souleyez/engine/background.py +1239 -535
- souleyez/engine/base.py +4 -1
- souleyez/engine/job_status.py +17 -49
- souleyez/engine/log_sanitizer.py +103 -77
- souleyez/engine/manager.py +38 -7
- souleyez/engine/result_handler.py +2200 -1550
- souleyez/engine/worker_manager.py +50 -41
- souleyez/export/evidence_bundle.py +72 -62
- souleyez/feature_flags/features.py +16 -20
- souleyez/feature_flags.py +5 -9
- souleyez/handlers/__init__.py +11 -0
- souleyez/handlers/base.py +188 -0
- souleyez/handlers/bash_handler.py +277 -0
- souleyez/handlers/bloodhound_handler.py +243 -0
- souleyez/handlers/certipy_handler.py +311 -0
- souleyez/handlers/crackmapexec_handler.py +486 -0
- souleyez/handlers/dnsrecon_handler.py +344 -0
- souleyez/handlers/enum4linux_handler.py +400 -0
- souleyez/handlers/evil_winrm_handler.py +493 -0
- souleyez/handlers/ffuf_handler.py +815 -0
- souleyez/handlers/gobuster_handler.py +1114 -0
- souleyez/handlers/gpp_extract_handler.py +334 -0
- souleyez/handlers/hashcat_handler.py +444 -0
- souleyez/handlers/hydra_handler.py +564 -0
- souleyez/handlers/impacket_getuserspns_handler.py +343 -0
- souleyez/handlers/impacket_psexec_handler.py +222 -0
- souleyez/handlers/impacket_secretsdump_handler.py +426 -0
- souleyez/handlers/john_handler.py +286 -0
- souleyez/handlers/katana_handler.py +425 -0
- souleyez/handlers/kerbrute_handler.py +298 -0
- souleyez/handlers/ldapsearch_handler.py +636 -0
- souleyez/handlers/lfi_extract_handler.py +464 -0
- souleyez/handlers/msf_auxiliary_handler.py +409 -0
- souleyez/handlers/msf_exploit_handler.py +380 -0
- souleyez/handlers/nikto_handler.py +413 -0
- souleyez/handlers/nmap_handler.py +821 -0
- souleyez/handlers/nuclei_handler.py +359 -0
- souleyez/handlers/nxc_handler.py +417 -0
- souleyez/handlers/rdp_sec_check_handler.py +353 -0
- souleyez/handlers/registry.py +292 -0
- souleyez/handlers/responder_handler.py +232 -0
- souleyez/handlers/service_explorer_handler.py +434 -0
- souleyez/handlers/smbclient_handler.py +344 -0
- souleyez/handlers/smbmap_handler.py +510 -0
- souleyez/handlers/smbpasswd_handler.py +296 -0
- souleyez/handlers/sqlmap_handler.py +1116 -0
- souleyez/handlers/theharvester_handler.py +601 -0
- souleyez/handlers/web_login_test_handler.py +327 -0
- souleyez/handlers/whois_handler.py +277 -0
- souleyez/handlers/wpscan_handler.py +554 -0
- souleyez/history.py +32 -16
- souleyez/importers/msf_importer.py +106 -75
- souleyez/importers/smart_importer.py +208 -147
- souleyez/integrations/siem/__init__.py +10 -10
- souleyez/integrations/siem/base.py +17 -18
- souleyez/integrations/siem/elastic.py +108 -122
- souleyez/integrations/siem/factory.py +207 -80
- souleyez/integrations/siem/googlesecops.py +146 -154
- souleyez/integrations/siem/rule_mappings/__init__.py +1 -1
- souleyez/integrations/siem/rule_mappings/wazuh_rules.py +8 -5
- souleyez/integrations/siem/sentinel.py +107 -109
- souleyez/integrations/siem/splunk.py +246 -212
- souleyez/integrations/siem/wazuh.py +65 -71
- souleyez/integrations/wazuh/__init__.py +5 -5
- souleyez/integrations/wazuh/client.py +70 -93
- souleyez/integrations/wazuh/config.py +85 -57
- souleyez/integrations/wazuh/host_mapper.py +28 -36
- souleyez/integrations/wazuh/sync.py +78 -68
- souleyez/intelligence/__init__.py +4 -5
- souleyez/intelligence/correlation_analyzer.py +309 -295
- souleyez/intelligence/exploit_knowledge.py +661 -623
- souleyez/intelligence/exploit_suggestions.py +159 -139
- souleyez/intelligence/gap_analyzer.py +132 -97
- souleyez/intelligence/gap_detector.py +251 -214
- souleyez/intelligence/sensitive_tables.py +266 -129
- souleyez/intelligence/service_parser.py +137 -123
- souleyez/intelligence/surface_analyzer.py +407 -268
- souleyez/intelligence/target_parser.py +159 -162
- souleyez/licensing/__init__.py +6 -6
- souleyez/licensing/validator.py +17 -19
- souleyez/log_config.py +79 -54
- souleyez/main.py +1505 -687
- souleyez/migrations/fix_job_counter.py +16 -14
- souleyez/parsers/bloodhound_parser.py +41 -39
- souleyez/parsers/crackmapexec_parser.py +178 -111
- souleyez/parsers/dalfox_parser.py +72 -77
- souleyez/parsers/dnsrecon_parser.py +103 -91
- souleyez/parsers/enum4linux_parser.py +183 -153
- souleyez/parsers/ffuf_parser.py +29 -25
- souleyez/parsers/gobuster_parser.py +301 -41
- souleyez/parsers/hashcat_parser.py +324 -79
- souleyez/parsers/http_fingerprint_parser.py +350 -103
- souleyez/parsers/hydra_parser.py +131 -111
- souleyez/parsers/impacket_parser.py +231 -178
- souleyez/parsers/john_parser.py +98 -86
- souleyez/parsers/katana_parser.py +316 -0
- souleyez/parsers/msf_parser.py +943 -498
- souleyez/parsers/nikto_parser.py +346 -65
- souleyez/parsers/nmap_parser.py +262 -174
- souleyez/parsers/nuclei_parser.py +40 -44
- souleyez/parsers/responder_parser.py +26 -26
- souleyez/parsers/searchsploit_parser.py +74 -74
- souleyez/parsers/service_explorer_parser.py +279 -0
- souleyez/parsers/smbmap_parser.py +180 -124
- souleyez/parsers/sqlmap_parser.py +434 -308
- souleyez/parsers/theharvester_parser.py +75 -57
- souleyez/parsers/whois_parser.py +135 -94
- souleyez/parsers/wpscan_parser.py +278 -190
- souleyez/plugins/afp.py +44 -36
- souleyez/plugins/afp_brute.py +114 -46
- souleyez/plugins/ard.py +48 -37
- souleyez/plugins/bloodhound.py +95 -61
- souleyez/plugins/certipy.py +303 -0
- souleyez/plugins/crackmapexec.py +186 -85
- souleyez/plugins/dalfox.py +120 -59
- souleyez/plugins/dns_hijack.py +146 -41
- souleyez/plugins/dnsrecon.py +97 -61
- souleyez/plugins/enum4linux.py +91 -66
- souleyez/plugins/evil_winrm.py +291 -0
- souleyez/plugins/ffuf.py +166 -90
- souleyez/plugins/firmware_extract.py +133 -29
- souleyez/plugins/gobuster.py +387 -190
- souleyez/plugins/gpp_extract.py +393 -0
- souleyez/plugins/hashcat.py +100 -73
- souleyez/plugins/http_fingerprint.py +913 -267
- souleyez/plugins/hydra.py +566 -200
- souleyez/plugins/impacket_getnpusers.py +117 -69
- souleyez/plugins/impacket_psexec.py +84 -64
- souleyez/plugins/impacket_secretsdump.py +103 -69
- souleyez/plugins/impacket_smbclient.py +89 -75
- souleyez/plugins/john.py +86 -69
- souleyez/plugins/katana.py +313 -0
- souleyez/plugins/kerbrute.py +237 -0
- souleyez/plugins/lfi_extract.py +541 -0
- souleyez/plugins/macos_ssh.py +117 -48
- souleyez/plugins/mdns.py +35 -30
- souleyez/plugins/msf_auxiliary.py +253 -130
- souleyez/plugins/msf_exploit.py +239 -161
- souleyez/plugins/nikto.py +134 -78
- souleyez/plugins/nmap.py +275 -91
- souleyez/plugins/nuclei.py +180 -89
- souleyez/plugins/nxc.py +285 -0
- souleyez/plugins/plugin_base.py +35 -36
- souleyez/plugins/plugin_template.py +13 -5
- souleyez/plugins/rdp_sec_check.py +130 -0
- souleyez/plugins/responder.py +112 -71
- souleyez/plugins/router_http_brute.py +76 -65
- souleyez/plugins/router_ssh_brute.py +118 -41
- souleyez/plugins/router_telnet_brute.py +124 -42
- souleyez/plugins/routersploit.py +91 -59
- souleyez/plugins/routersploit_exploit.py +77 -55
- souleyez/plugins/searchsploit.py +91 -77
- souleyez/plugins/service_explorer.py +1160 -0
- souleyez/plugins/smbmap.py +122 -72
- souleyez/plugins/smbpasswd.py +215 -0
- souleyez/plugins/sqlmap.py +301 -113
- souleyez/plugins/theharvester.py +127 -75
- souleyez/plugins/tr069.py +79 -57
- souleyez/plugins/upnp.py +65 -47
- souleyez/plugins/upnp_abuse.py +73 -55
- souleyez/plugins/vnc_access.py +129 -42
- souleyez/plugins/vnc_brute.py +109 -38
- souleyez/plugins/web_login_test.py +417 -0
- souleyez/plugins/whois.py +77 -58
- souleyez/plugins/wpscan.py +219 -69
- souleyez/reporting/__init__.py +2 -1
- souleyez/reporting/attack_chain.py +411 -346
- souleyez/reporting/charts.py +436 -501
- souleyez/reporting/compliance_mappings.py +334 -201
- souleyez/reporting/detection_report.py +126 -125
- souleyez/reporting/formatters.py +828 -591
- souleyez/reporting/generator.py +386 -302
- souleyez/reporting/metrics.py +72 -75
- souleyez/scanner.py +35 -29
- souleyez/security/__init__.py +37 -11
- souleyez/security/scope_validator.py +175 -106
- souleyez/security/validation.py +237 -149
- souleyez/security.py +22 -6
- souleyez/storage/credentials.py +247 -186
- souleyez/storage/crypto.py +296 -129
- souleyez/storage/database.py +73 -50
- souleyez/storage/db.py +58 -36
- souleyez/storage/deliverable_evidence.py +177 -128
- souleyez/storage/deliverable_exporter.py +282 -246
- souleyez/storage/deliverable_templates.py +134 -116
- souleyez/storage/deliverables.py +135 -130
- souleyez/storage/engagements.py +109 -56
- souleyez/storage/evidence.py +181 -152
- souleyez/storage/execution_log.py +31 -17
- souleyez/storage/exploit_attempts.py +93 -57
- souleyez/storage/exploits.py +67 -36
- souleyez/storage/findings.py +48 -61
- souleyez/storage/hosts.py +176 -144
- souleyez/storage/migrate_to_engagements.py +43 -19
- souleyez/storage/migrations/_001_add_credential_enhancements.py +22 -12
- souleyez/storage/migrations/_002_add_status_tracking.py +10 -7
- souleyez/storage/migrations/_003_add_execution_log.py +14 -8
- souleyez/storage/migrations/_005_screenshots.py +13 -5
- souleyez/storage/migrations/_006_deliverables.py +13 -5
- souleyez/storage/migrations/_007_deliverable_templates.py +12 -7
- souleyez/storage/migrations/_008_add_nuclei_table.py +10 -4
- souleyez/storage/migrations/_010_evidence_linking.py +17 -10
- souleyez/storage/migrations/_011_timeline_tracking.py +20 -13
- souleyez/storage/migrations/_012_team_collaboration.py +34 -21
- souleyez/storage/migrations/_013_add_host_tags.py +12 -6
- souleyez/storage/migrations/_014_exploit_attempts.py +22 -10
- souleyez/storage/migrations/_015_add_mac_os_fields.py +15 -7
- souleyez/storage/migrations/_016_add_domain_field.py +10 -4
- souleyez/storage/migrations/_017_msf_sessions.py +16 -8
- souleyez/storage/migrations/_018_add_osint_target.py +10 -6
- souleyez/storage/migrations/_019_add_engagement_type.py +10 -6
- souleyez/storage/migrations/_020_add_rbac.py +36 -15
- souleyez/storage/migrations/_021_wazuh_integration.py +20 -8
- souleyez/storage/migrations/_022_wazuh_indexer_columns.py +6 -4
- souleyez/storage/migrations/_023_fix_detection_results_fk.py +16 -6
- souleyez/storage/migrations/_024_wazuh_vulnerabilities.py +26 -10
- souleyez/storage/migrations/_025_multi_siem_support.py +3 -5
- souleyez/storage/migrations/_026_add_engagement_scope.py +31 -12
- souleyez/storage/migrations/_027_multi_siem_persistence.py +32 -15
- souleyez/storage/migrations/__init__.py +26 -26
- souleyez/storage/migrations/migration_manager.py +19 -19
- souleyez/storage/msf_sessions.py +100 -65
- souleyez/storage/osint.py +17 -24
- souleyez/storage/recommendation_engine.py +269 -235
- souleyez/storage/screenshots.py +33 -32
- souleyez/storage/smb_shares.py +136 -92
- souleyez/storage/sqlmap_data.py +183 -128
- souleyez/storage/team_collaboration.py +135 -141
- souleyez/storage/timeline_tracker.py +122 -94
- souleyez/storage/wazuh_vulns.py +64 -66
- souleyez/storage/web_paths.py +33 -37
- souleyez/testing/credential_tester.py +221 -205
- souleyez/ui/__init__.py +1 -1
- souleyez/ui/ai_quotes.py +12 -12
- souleyez/ui/attack_surface.py +2439 -1516
- souleyez/ui/chain_rules_view.py +914 -382
- souleyez/ui/correlation_view.py +312 -230
- souleyez/ui/dashboard.py +2382 -1130
- souleyez/ui/deliverables_view.py +148 -62
- souleyez/ui/design_system.py +13 -13
- souleyez/ui/errors.py +49 -49
- souleyez/ui/evidence_linking_view.py +284 -179
- souleyez/ui/evidence_vault.py +393 -285
- souleyez/ui/exploit_suggestions_view.py +555 -349
- souleyez/ui/export_view.py +100 -66
- souleyez/ui/gap_analysis_view.py +315 -171
- souleyez/ui/help_system.py +105 -97
- souleyez/ui/intelligence_view.py +436 -293
- souleyez/ui/interactive.py +23034 -10679
- souleyez/ui/interactive_selector.py +75 -68
- souleyez/ui/log_formatter.py +47 -39
- souleyez/ui/menu_components.py +22 -13
- souleyez/ui/msf_auxiliary_menu.py +184 -133
- souleyez/ui/pending_chains_view.py +336 -172
- souleyez/ui/progress_indicators.py +5 -3
- souleyez/ui/recommendations_view.py +195 -137
- souleyez/ui/rule_builder.py +343 -225
- souleyez/ui/setup_wizard.py +678 -284
- souleyez/ui/shortcuts.py +217 -165
- souleyez/ui/splunk_gap_analysis_view.py +452 -270
- souleyez/ui/splunk_vulns_view.py +139 -86
- souleyez/ui/team_dashboard.py +498 -335
- souleyez/ui/template_selector.py +196 -105
- souleyez/ui/terminal.py +6 -6
- souleyez/ui/timeline_view.py +198 -127
- souleyez/ui/tool_setup.py +264 -164
- souleyez/ui/tutorial.py +202 -72
- souleyez/ui/tutorial_state.py +40 -40
- souleyez/ui/wazuh_vulns_view.py +235 -141
- souleyez/ui/wordlist_browser.py +260 -107
- souleyez/ui.py +464 -312
- souleyez/utils/tool_checker.py +427 -367
- souleyez/utils.py +33 -29
- souleyez/wordlists.py +134 -167
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/METADATA +2 -2
- souleyez-3.0.0.dist-info/RECORD +443 -0
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/WHEEL +1 -1
- souleyez-2.43.29.dist-info/RECORD +0 -379
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/entry_points.txt +0 -0
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/licenses/LICENSE +0 -0
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/top_level.txt +0 -0
|
@@ -11,11 +11,12 @@ Detects:
|
|
|
11
11
|
|
|
12
12
|
This runs BEFORE web vulnerability scanners to enable smarter tool configuration.
|
|
13
13
|
"""
|
|
14
|
+
|
|
14
15
|
import json
|
|
15
|
-
import time
|
|
16
|
-
import ssl
|
|
17
16
|
import socket
|
|
18
|
-
|
|
17
|
+
import ssl
|
|
18
|
+
import time
|
|
19
|
+
from typing import Any, Dict, List, Optional
|
|
19
20
|
from urllib.parse import urlparse
|
|
20
21
|
|
|
21
22
|
from .plugin_base import PluginBase
|
|
@@ -45,238 +46,370 @@ HELP = {
|
|
|
45
46
|
"presets": [
|
|
46
47
|
{"name": "Quick Fingerprint", "args": [], "desc": "Fast fingerprint scan"},
|
|
47
48
|
],
|
|
49
|
+
"help_sections": [
|
|
50
|
+
{
|
|
51
|
+
"title": "What is HTTP Fingerprinting?",
|
|
52
|
+
"color": "cyan",
|
|
53
|
+
"content": [
|
|
54
|
+
(
|
|
55
|
+
"Overview",
|
|
56
|
+
[
|
|
57
|
+
"Lightweight reconnaissance that identifies web infrastructure",
|
|
58
|
+
"Runs automatically before vulnerability scanners",
|
|
59
|
+
"Enables smarter tool configuration based on detected technology",
|
|
60
|
+
],
|
|
61
|
+
),
|
|
62
|
+
(
|
|
63
|
+
"What It Detects",
|
|
64
|
+
[
|
|
65
|
+
"Server software - Apache, nginx, IIS, LiteSpeed",
|
|
66
|
+
"WAFs - Cloudflare, Akamai, AWS WAF, Imperva, Sucuri",
|
|
67
|
+
"CDNs - Cloudflare, Fastly, CloudFront, Akamai",
|
|
68
|
+
"Managed hosting - Squarespace, Wix, Shopify, Netlify",
|
|
69
|
+
],
|
|
70
|
+
),
|
|
71
|
+
],
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"title": "Usage & Examples",
|
|
75
|
+
"color": "green",
|
|
76
|
+
"content": [
|
|
77
|
+
(
|
|
78
|
+
"Basic Usage",
|
|
79
|
+
[
|
|
80
|
+
"souleyez jobs enqueue http_fingerprint http://example.com",
|
|
81
|
+
"souleyez jobs enqueue http_fingerprint https://example.com",
|
|
82
|
+
" → Detects server, WAF, CDN, and hosting platform",
|
|
83
|
+
],
|
|
84
|
+
),
|
|
85
|
+
],
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"title": "Why This Matters",
|
|
89
|
+
"color": "yellow",
|
|
90
|
+
"content": [
|
|
91
|
+
(
|
|
92
|
+
"Smart Tool Configuration",
|
|
93
|
+
[
|
|
94
|
+
"If Squarespace detected → skip CGI enumeration (pointless)",
|
|
95
|
+
"If Cloudflare WAF detected → adjust scan rate to avoid blocks",
|
|
96
|
+
"If nginx detected → test nginx-specific vulnerabilities",
|
|
97
|
+
],
|
|
98
|
+
),
|
|
99
|
+
(
|
|
100
|
+
"Attack Surface Mapping",
|
|
101
|
+
[
|
|
102
|
+
"Managed platforms have limited attack surface",
|
|
103
|
+
"WAFs require evasion techniques or finding bypasses",
|
|
104
|
+
"CDNs may hide the real origin server IP",
|
|
105
|
+
],
|
|
106
|
+
),
|
|
107
|
+
],
|
|
108
|
+
},
|
|
109
|
+
],
|
|
48
110
|
}
|
|
49
111
|
|
|
50
112
|
# WAF detection signatures
|
|
51
113
|
# Format: {header_name: {value_pattern: waf_name}}
|
|
52
114
|
WAF_SIGNATURES = {
|
|
53
115
|
# Header-based detection
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
116
|
+
"headers": {
|
|
117
|
+
"server": {
|
|
118
|
+
"cloudflare": "Cloudflare",
|
|
119
|
+
"akamaighost": "Akamai",
|
|
120
|
+
"akamainetworkstorage": "Akamai",
|
|
121
|
+
"awselb": "AWS ELB",
|
|
122
|
+
"bigip": "F5 BIG-IP",
|
|
123
|
+
"barracuda": "Barracuda",
|
|
124
|
+
"denyall": "DenyAll",
|
|
125
|
+
"fortigate": "Fortinet FortiGate",
|
|
126
|
+
"imperva": "Imperva",
|
|
127
|
+
"incapsula": "Imperva Incapsula",
|
|
128
|
+
"netscaler": "Citrix NetScaler",
|
|
129
|
+
"sucuri": "Sucuri",
|
|
130
|
+
"wallarm": "Wallarm",
|
|
69
131
|
},
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
132
|
+
"x-powered-by": {
|
|
133
|
+
"aws lambda": "AWS Lambda",
|
|
134
|
+
"express": "Express.js",
|
|
135
|
+
"php": "PHP",
|
|
136
|
+
"asp.net": "ASP.NET",
|
|
75
137
|
},
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
138
|
+
"x-sucuri-id": {"": "Sucuri"},
|
|
139
|
+
"x-sucuri-cache": {"": "Sucuri"},
|
|
140
|
+
"cf-ray": {"": "Cloudflare"},
|
|
141
|
+
"cf-cache-status": {"": "Cloudflare"},
|
|
142
|
+
"x-amz-cf-id": {"": "AWS CloudFront"},
|
|
143
|
+
"x-amz-cf-pop": {"": "AWS CloudFront"},
|
|
144
|
+
"x-akamai-transformed": {"": "Akamai"},
|
|
145
|
+
"x-cache": {
|
|
146
|
+
"cloudfront": "AWS CloudFront",
|
|
147
|
+
"varnish": "Varnish",
|
|
86
148
|
},
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
149
|
+
"x-fastly-request-id": {"": "Fastly"},
|
|
150
|
+
"x-served-by": {
|
|
151
|
+
"cache-": "Fastly",
|
|
90
152
|
},
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
153
|
+
"x-cdn": {
|
|
154
|
+
"incapsula": "Imperva Incapsula",
|
|
155
|
+
"cloudflare": "Cloudflare",
|
|
94
156
|
},
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
157
|
+
"x-iinfo": {"": "Imperva Incapsula"},
|
|
158
|
+
"x-proxy-id": {"": "Imperva"},
|
|
159
|
+
"x-request-id": {}, # Generic, but useful context
|
|
160
|
+
"x-fw-protection": {"": "Unknown WAF"},
|
|
161
|
+
"x-protected-by": {"": "Unknown WAF"},
|
|
162
|
+
"x-waf-status": {"": "Unknown WAF"},
|
|
163
|
+
"x-denied-reason": {"": "Unknown WAF"},
|
|
102
164
|
},
|
|
103
165
|
# Cookie-based detection
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
166
|
+
"cookies": {
|
|
167
|
+
"__cfduid": "Cloudflare",
|
|
168
|
+
"cf_clearance": "Cloudflare",
|
|
169
|
+
"__cf_bm": "Cloudflare Bot Management",
|
|
170
|
+
"incap_ses": "Imperva Incapsula",
|
|
171
|
+
"visid_incap": "Imperva Incapsula",
|
|
172
|
+
"nlbi_": "Imperva Incapsula",
|
|
173
|
+
"ak_bmsc": "Akamai Bot Manager",
|
|
174
|
+
"bm_sz": "Akamai Bot Manager",
|
|
175
|
+
"_abck": "Akamai Bot Manager",
|
|
176
|
+
"awsalb": "AWS ALB",
|
|
177
|
+
"awsalbcors": "AWS ALB",
|
|
178
|
+
"ts": "F5 BIG-IP",
|
|
179
|
+
"bigipserver": "F5 BIG-IP",
|
|
180
|
+
"citrix_ns_id": "Citrix NetScaler",
|
|
181
|
+
"sucuri_cloudproxy": "Sucuri",
|
|
120
182
|
},
|
|
121
183
|
}
|
|
122
184
|
|
|
123
185
|
# CDN detection signatures
|
|
124
186
|
CDN_SIGNATURES = {
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
187
|
+
"headers": {
|
|
188
|
+
"cf-ray": "Cloudflare",
|
|
189
|
+
"cf-cache-status": "Cloudflare",
|
|
190
|
+
"x-amz-cf-id": "AWS CloudFront",
|
|
191
|
+
"x-amz-cf-pop": "AWS CloudFront",
|
|
192
|
+
"x-cache": {
|
|
193
|
+
"cloudfront": "AWS CloudFront",
|
|
194
|
+
"hit from cloudfront": "AWS CloudFront",
|
|
133
195
|
},
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
196
|
+
"x-fastly-request-id": "Fastly",
|
|
197
|
+
"x-served-by": "Fastly",
|
|
198
|
+
"x-akamai-transformed": "Akamai",
|
|
199
|
+
"x-akamai-request-id": "Akamai",
|
|
200
|
+
"x-edge-location": "Generic CDN",
|
|
201
|
+
"x-cdn": "Generic CDN",
|
|
202
|
+
"x-cache-status": "Generic CDN",
|
|
203
|
+
"x-varnish": "Varnish",
|
|
204
|
+
"via": {
|
|
205
|
+
"cloudfront": "AWS CloudFront",
|
|
206
|
+
"varnish": "Varnish",
|
|
207
|
+
"akamai": "Akamai",
|
|
146
208
|
},
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
209
|
+
"x-azure-ref": "Azure CDN",
|
|
210
|
+
"x-msedge-ref": "Azure CDN",
|
|
211
|
+
"x-goog-": "Google Cloud CDN",
|
|
212
|
+
"x-bunny-": "Bunny CDN",
|
|
213
|
+
"x-hw": "Huawei CDN",
|
|
152
214
|
},
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
215
|
+
"server": {
|
|
216
|
+
"cloudflare": "Cloudflare",
|
|
217
|
+
"akamaighost": "Akamai",
|
|
218
|
+
"cloudfront": "AWS CloudFront",
|
|
219
|
+
"fastly": "Fastly",
|
|
220
|
+
"varnish": "Varnish",
|
|
221
|
+
"keycdn": "KeyCDN",
|
|
222
|
+
"bunnycdn": "Bunny CDN",
|
|
223
|
+
"cdn77": "CDN77",
|
|
224
|
+
"stackpath": "StackPath",
|
|
225
|
+
"limelight": "Limelight",
|
|
226
|
+
"azure": "Azure CDN",
|
|
165
227
|
},
|
|
166
228
|
}
|
|
167
229
|
|
|
168
230
|
# Managed hosting platform signatures
|
|
169
231
|
MANAGED_HOSTING_SIGNATURES = {
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
232
|
+
"server": {
|
|
233
|
+
"squarespace": "Squarespace",
|
|
234
|
+
"wix": "Wix",
|
|
235
|
+
"shopify": "Shopify",
|
|
236
|
+
"weebly": "Weebly",
|
|
237
|
+
"webflow": "Webflow",
|
|
238
|
+
"ghost": "Ghost",
|
|
239
|
+
"medium": "Medium",
|
|
240
|
+
"tumblr": "Tumblr",
|
|
241
|
+
"blogger": "Blogger/Blogspot",
|
|
242
|
+
"wordpress.com": "WordPress.com",
|
|
243
|
+
"netlify": "Netlify",
|
|
244
|
+
"vercel": "Vercel",
|
|
245
|
+
"heroku": "Heroku",
|
|
246
|
+
"github": "GitHub Pages",
|
|
247
|
+
"gitlab": "GitLab Pages",
|
|
248
|
+
"firebase": "Firebase Hosting",
|
|
249
|
+
"render": "Render",
|
|
250
|
+
"railway": "Railway",
|
|
251
|
+
"fly": "Fly.io",
|
|
252
|
+
"deno": "Deno Deploy",
|
|
191
253
|
},
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
254
|
+
"headers": {
|
|
255
|
+
"x-shopify-stage": "Shopify",
|
|
256
|
+
"x-shopify-request-id": "Shopify",
|
|
257
|
+
"x-wix-request-id": "Wix",
|
|
258
|
+
"x-wix-renderer-server": "Wix",
|
|
259
|
+
"x-sqsp-edge": "Squarespace",
|
|
260
|
+
"x-squarespace-": "Squarespace",
|
|
261
|
+
"x-ghost-": "Ghost",
|
|
262
|
+
"x-medium-content": "Medium",
|
|
263
|
+
"x-tumblr-": "Tumblr",
|
|
264
|
+
"x-blogger-": "Blogger/Blogspot",
|
|
265
|
+
"x-netlify-": "Netlify",
|
|
266
|
+
"x-nf-request-id": "Netlify",
|
|
267
|
+
"x-vercel-": "Vercel",
|
|
268
|
+
"x-vercel-id": "Vercel",
|
|
269
|
+
"x-heroku-": "Heroku",
|
|
270
|
+
"x-github-request-id": "GitHub Pages",
|
|
271
|
+
"x-firebase-": "Firebase Hosting",
|
|
272
|
+
"x-render-origin-server": "Render",
|
|
273
|
+
"fly-request-id": "Fly.io",
|
|
212
274
|
},
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
275
|
+
"cookies": {
|
|
276
|
+
"wordpress_": "WordPress",
|
|
277
|
+
"wp-settings": "WordPress",
|
|
278
|
+
"_shopify_": "Shopify",
|
|
279
|
+
"wixSession": "Wix",
|
|
218
280
|
},
|
|
219
281
|
}
|
|
220
282
|
|
|
221
283
|
# Server software signatures
|
|
222
284
|
SERVER_SIGNATURES = {
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
285
|
+
"apache": "Apache",
|
|
286
|
+
"nginx": "nginx",
|
|
287
|
+
"microsoft-iis": "Microsoft IIS",
|
|
288
|
+
"iis": "Microsoft IIS",
|
|
289
|
+
"lighttpd": "lighttpd",
|
|
290
|
+
"litespeed": "LiteSpeed",
|
|
291
|
+
"openresty": "OpenResty",
|
|
292
|
+
"caddy": "Caddy",
|
|
293
|
+
"tomcat": "Apache Tomcat",
|
|
294
|
+
"jetty": "Eclipse Jetty",
|
|
295
|
+
"gunicorn": "Gunicorn",
|
|
296
|
+
"uvicorn": "Uvicorn",
|
|
297
|
+
"werkzeug": "Werkzeug (Flask)",
|
|
298
|
+
"waitress": "Waitress",
|
|
299
|
+
"cowboy": "Cowboy (Erlang)",
|
|
300
|
+
"kestrel": "Kestrel (ASP.NET)",
|
|
301
|
+
"express": "Express.js",
|
|
240
302
|
}
|
|
241
303
|
|
|
242
304
|
|
|
243
305
|
class HttpFingerprintPlugin(PluginBase):
|
|
244
306
|
name = "HTTP Fingerprint"
|
|
245
307
|
tool = "http_fingerprint"
|
|
246
|
-
category = "
|
|
308
|
+
category = "scanning"
|
|
247
309
|
HELP = HELP
|
|
248
310
|
|
|
249
|
-
def build_command(
|
|
311
|
+
def build_command(
|
|
312
|
+
self, target: str, args: List[str] = None, label: str = "", log_path: str = None
|
|
313
|
+
):
|
|
250
314
|
"""
|
|
251
315
|
HTTP fingerprinting is done in Python, not via external command.
|
|
252
316
|
Return None to use run() method instead.
|
|
253
317
|
"""
|
|
254
318
|
return None
|
|
255
319
|
|
|
256
|
-
def run(
|
|
257
|
-
""
|
|
320
|
+
def run(
|
|
321
|
+
self, target: str, args: List[str] = None, label: str = "", log_path: str = None
|
|
322
|
+
) -> int:
|
|
323
|
+
"""Execute HTTP fingerprint scan with smart protocol detection."""
|
|
258
324
|
args = args or []
|
|
259
325
|
timeout = 10
|
|
260
326
|
|
|
261
327
|
# Parse timeout from args
|
|
262
328
|
for i, arg in enumerate(args):
|
|
263
|
-
if arg ==
|
|
329
|
+
if arg == "--timeout" and i + 1 < len(args):
|
|
264
330
|
try:
|
|
265
331
|
timeout = int(args[i + 1])
|
|
266
332
|
except ValueError:
|
|
267
333
|
pass
|
|
268
334
|
|
|
269
335
|
# Ensure target has scheme
|
|
270
|
-
if not target.startswith((
|
|
271
|
-
target = f
|
|
336
|
+
if not target.startswith(("http://", "https://")):
|
|
337
|
+
target = f"http://{target}"
|
|
272
338
|
|
|
273
339
|
try:
|
|
274
|
-
|
|
275
|
-
|
|
340
|
+
# Use thread-based hard timeout to prevent indefinite hangs
|
|
341
|
+
# urllib timeouts don't always work if server accepts connection but stalls
|
|
342
|
+
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeout
|
|
343
|
+
|
|
344
|
+
hard_timeout = timeout * 3 # 30 seconds max for entire probe operation
|
|
345
|
+
|
|
346
|
+
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
347
|
+
future = executor.submit(self._smart_probe, target, timeout)
|
|
348
|
+
try:
|
|
349
|
+
result, effective_url = future.result(timeout=hard_timeout)
|
|
350
|
+
except FuturesTimeout:
|
|
351
|
+
# Hard timeout hit - server is unresponsive
|
|
352
|
+
result = {
|
|
353
|
+
"error": f"Timeout: server did not respond within {hard_timeout}s",
|
|
354
|
+
"status_code": None,
|
|
355
|
+
"server": None,
|
|
356
|
+
"waf": [],
|
|
357
|
+
"cdn": [],
|
|
358
|
+
"managed_hosting": None,
|
|
359
|
+
"technologies": [],
|
|
360
|
+
"headers": {},
|
|
361
|
+
"cookies": [],
|
|
362
|
+
"tls": None,
|
|
363
|
+
"redirect_url": None,
|
|
364
|
+
}
|
|
365
|
+
effective_url = target
|
|
366
|
+
|
|
367
|
+
output = self._format_output(effective_url, result, label)
|
|
276
368
|
|
|
277
369
|
if log_path:
|
|
278
|
-
with open(log_path,
|
|
370
|
+
with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
|
|
279
371
|
fh.write(output)
|
|
372
|
+
|
|
373
|
+
# Skip additional probing if initial fingerprint failed
|
|
374
|
+
if not result.get("error"):
|
|
375
|
+
# Fetch robots.txt and sitemap.xml for path discovery
|
|
376
|
+
robots_paths, sitemap_paths = self._fetch_robots_sitemap(
|
|
377
|
+
effective_url, timeout
|
|
378
|
+
)
|
|
379
|
+
result["robots_paths"] = robots_paths
|
|
380
|
+
result["sitemap_paths"] = sitemap_paths
|
|
381
|
+
|
|
382
|
+
# Quick path probing for CMS, admin panels, API endpoints
|
|
383
|
+
quick_probe = self._quick_path_probe(effective_url, timeout)
|
|
384
|
+
result["cms_detected"] = quick_probe.get("cms")
|
|
385
|
+
result["admin_panels"] = quick_probe.get("admin_panels", [])
|
|
386
|
+
result["api_endpoints"] = quick_probe.get("api_endpoints", [])
|
|
387
|
+
|
|
388
|
+
# Write additional detections to log
|
|
389
|
+
if quick_probe.get("cms"):
|
|
390
|
+
cms = quick_probe["cms"]
|
|
391
|
+
fh.write(f"\n{'=' * 40}\n")
|
|
392
|
+
fh.write(
|
|
393
|
+
f"CMS DETECTED: {cms['name']} ({cms['confidence']} confidence)\n"
|
|
394
|
+
)
|
|
395
|
+
for p in cms["paths"]:
|
|
396
|
+
fh.write(f" - {p['path']} (HTTP {p['status']})\n")
|
|
397
|
+
fh.write(f"{'=' * 40}\n")
|
|
398
|
+
|
|
399
|
+
if quick_probe.get("admin_panels"):
|
|
400
|
+
fh.write(f"\nADMIN PANELS FOUND:\n")
|
|
401
|
+
for panel in quick_probe["admin_panels"]:
|
|
402
|
+
fh.write(
|
|
403
|
+
f" - {panel['name']}: {panel['url']} (HTTP {panel['status']})\n"
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
if quick_probe.get("api_endpoints"):
|
|
407
|
+
fh.write(f"\nAPI ENDPOINTS FOUND:\n")
|
|
408
|
+
for api in quick_probe["api_endpoints"]:
|
|
409
|
+
fh.write(
|
|
410
|
+
f" - {api['type']}: {api['url']} (HTTP {api['status']})\n"
|
|
411
|
+
)
|
|
412
|
+
|
|
280
413
|
# Write JSON result for parsing
|
|
281
414
|
fh.write("\n\n=== JSON_RESULT ===\n")
|
|
282
415
|
fh.write(json.dumps(result, indent=2))
|
|
@@ -290,11 +423,181 @@ class HttpFingerprintPlugin(PluginBase):
|
|
|
290
423
|
error_output += f"Error: {type(e).__name__}: {e}\n"
|
|
291
424
|
|
|
292
425
|
if log_path:
|
|
293
|
-
with open(log_path,
|
|
426
|
+
with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
|
|
294
427
|
fh.write(error_output)
|
|
295
428
|
|
|
296
429
|
return 1
|
|
297
430
|
|
|
431
|
+
def _smart_probe(self, target: str, timeout: int = 10) -> tuple:
|
|
432
|
+
"""
|
|
433
|
+
Smart protocol detection: probe both HTTP and HTTPS, return the better result.
|
|
434
|
+
|
|
435
|
+
This handles cases where:
|
|
436
|
+
- nmap reports HTTP but server is actually HTTPS
|
|
437
|
+
- Server serves different content on HTTP vs HTTPS
|
|
438
|
+
- HTTP redirects to HTTPS (or vice versa)
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
tuple: (result_dict, effective_url)
|
|
442
|
+
"""
|
|
443
|
+
parsed = urlparse(target)
|
|
444
|
+
|
|
445
|
+
# Quick connectivity check - fail fast if port isn't responding
|
|
446
|
+
host = parsed.hostname
|
|
447
|
+
port = parsed.port or (443 if parsed.scheme == "https" else 80)
|
|
448
|
+
try:
|
|
449
|
+
with socket.create_connection((host, port), timeout=min(timeout, 5)) as sock:
|
|
450
|
+
pass # Just checking if we can connect
|
|
451
|
+
except (socket.timeout, socket.error, OSError) as e:
|
|
452
|
+
# Port not responding - return error result immediately
|
|
453
|
+
return {
|
|
454
|
+
"error": f"Connection failed: {e}",
|
|
455
|
+
"status_code": None,
|
|
456
|
+
"server": None,
|
|
457
|
+
"waf": [],
|
|
458
|
+
"cdn": [],
|
|
459
|
+
"managed_hosting": None,
|
|
460
|
+
"technologies": [],
|
|
461
|
+
"headers": {},
|
|
462
|
+
"cookies": [],
|
|
463
|
+
"tls": None,
|
|
464
|
+
"redirect_url": None,
|
|
465
|
+
"protocol_detection": "failed",
|
|
466
|
+
"effective_url": target,
|
|
467
|
+
}, target
|
|
468
|
+
|
|
469
|
+
# Build both URL variants
|
|
470
|
+
http_url = (
|
|
471
|
+
f"http://{host}:{port}"
|
|
472
|
+
if port not in (80, 443)
|
|
473
|
+
else f"http://{host}" if port == 80 else f"http://{host}:{port}"
|
|
474
|
+
)
|
|
475
|
+
https_url = (
|
|
476
|
+
f"https://{host}:{port}"
|
|
477
|
+
if port not in (80, 443)
|
|
478
|
+
else f"https://{host}" if port == 443 else f"https://{host}:{port}"
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
# Handle standard ports correctly
|
|
482
|
+
if port == 80:
|
|
483
|
+
http_url = f"http://{host}"
|
|
484
|
+
https_url = f"https://{host}:80" # Non-standard HTTPS on port 80
|
|
485
|
+
elif port == 443:
|
|
486
|
+
http_url = f"http://{host}:443" # Non-standard HTTP on port 443
|
|
487
|
+
https_url = f"https://{host}"
|
|
488
|
+
else:
|
|
489
|
+
http_url = f"http://{host}:{port}"
|
|
490
|
+
https_url = f"https://{host}:{port}"
|
|
491
|
+
|
|
492
|
+
# Probe the original protocol first
|
|
493
|
+
original_is_https = parsed.scheme == "https"
|
|
494
|
+
primary_url = target
|
|
495
|
+
alternate_url = https_url if not original_is_https else http_url
|
|
496
|
+
|
|
497
|
+
# Probe primary (original) URL
|
|
498
|
+
primary_result = self._fingerprint(primary_url, timeout)
|
|
499
|
+
|
|
500
|
+
# Calculate "richness" score for primary result
|
|
501
|
+
primary_score = self._calculate_result_richness(primary_result)
|
|
502
|
+
primary_status = primary_result.get("status_code") or 0
|
|
503
|
+
|
|
504
|
+
# Check if primary result is "good enough" to skip alternate probe
|
|
505
|
+
# Must have: successful status (2xx/3xx), decent score, no errors
|
|
506
|
+
# 4xx/5xx status means we MUST try alternate protocol (could be wrong protocol)
|
|
507
|
+
primary_is_successful = 200 <= primary_status < 400
|
|
508
|
+
|
|
509
|
+
if (
|
|
510
|
+
primary_is_successful
|
|
511
|
+
and primary_score >= 3
|
|
512
|
+
and not primary_result.get("error")
|
|
513
|
+
):
|
|
514
|
+
primary_result["protocol_detection"] = "primary"
|
|
515
|
+
primary_result["effective_url"] = primary_url
|
|
516
|
+
return primary_result, primary_url
|
|
517
|
+
|
|
518
|
+
# Otherwise, probe alternate protocol (primary failed, errored, or got 4xx/5xx)
|
|
519
|
+
alternate_result = self._fingerprint(alternate_url, timeout)
|
|
520
|
+
alternate_score = self._calculate_result_richness(alternate_result)
|
|
521
|
+
|
|
522
|
+
# Compare and choose the better result
|
|
523
|
+
if alternate_score > primary_score and not alternate_result.get("error"):
|
|
524
|
+
# Alternate protocol is better
|
|
525
|
+
alternate_result["protocol_detection"] = "upgraded"
|
|
526
|
+
alternate_result["protocol_note"] = (
|
|
527
|
+
f"Switched from {parsed.scheme.upper()} to {'HTTPS' if not original_is_https else 'HTTP'} (richer response)"
|
|
528
|
+
)
|
|
529
|
+
alternate_result["original_url"] = primary_url
|
|
530
|
+
alternate_result["effective_url"] = alternate_url
|
|
531
|
+
return alternate_result, alternate_url
|
|
532
|
+
elif not primary_result.get("error"):
|
|
533
|
+
# Primary is fine or equal
|
|
534
|
+
primary_result["protocol_detection"] = "primary"
|
|
535
|
+
primary_result["effective_url"] = primary_url
|
|
536
|
+
return primary_result, primary_url
|
|
537
|
+
elif not alternate_result.get("error"):
|
|
538
|
+
# Primary failed, alternate works
|
|
539
|
+
alternate_result["protocol_detection"] = "fallback"
|
|
540
|
+
alternate_result["protocol_note"] = (
|
|
541
|
+
f"Primary ({parsed.scheme.upper()}) failed, using {'HTTPS' if not original_is_https else 'HTTP'}"
|
|
542
|
+
)
|
|
543
|
+
alternate_result["original_url"] = primary_url
|
|
544
|
+
alternate_result["effective_url"] = alternate_url
|
|
545
|
+
return alternate_result, alternate_url
|
|
546
|
+
else:
|
|
547
|
+
# Both failed, return primary with error
|
|
548
|
+
primary_result["protocol_detection"] = "failed"
|
|
549
|
+
primary_result["effective_url"] = primary_url
|
|
550
|
+
return primary_result, primary_url
|
|
551
|
+
|
|
552
|
+
def _calculate_result_richness(self, result: Dict[str, Any]) -> int:
|
|
553
|
+
"""
|
|
554
|
+
Calculate a "richness" score for fingerprint results.
|
|
555
|
+
Higher score = more useful/valid response.
|
|
556
|
+
"""
|
|
557
|
+
score = 0
|
|
558
|
+
|
|
559
|
+
# Error = bad
|
|
560
|
+
if result.get("error"):
|
|
561
|
+
return 0
|
|
562
|
+
|
|
563
|
+
# Status code scoring
|
|
564
|
+
status = result.get("status_code")
|
|
565
|
+
if status == 200:
|
|
566
|
+
score += 3
|
|
567
|
+
elif status in (301, 302, 303, 307, 308):
|
|
568
|
+
score += 2 # Redirects are informative
|
|
569
|
+
elif status in (401, 403):
|
|
570
|
+
score += 2 # Auth required = real service
|
|
571
|
+
elif status in (404, 500, 502, 503):
|
|
572
|
+
score += 1 # At least it responded
|
|
573
|
+
|
|
574
|
+
# Has server header
|
|
575
|
+
if result.get("server"):
|
|
576
|
+
score += 1
|
|
577
|
+
|
|
578
|
+
# Has technologies detected
|
|
579
|
+
if result.get("technologies"):
|
|
580
|
+
score += len(result["technologies"])
|
|
581
|
+
|
|
582
|
+
# Has TLS info (means HTTPS worked)
|
|
583
|
+
if result.get("tls"):
|
|
584
|
+
score += 2
|
|
585
|
+
|
|
586
|
+
# Has WAF/CDN detection
|
|
587
|
+
if result.get("waf"):
|
|
588
|
+
score += 1
|
|
589
|
+
if result.get("cdn"):
|
|
590
|
+
score += 1
|
|
591
|
+
|
|
592
|
+
# Has headers (more headers = richer response)
|
|
593
|
+
headers = result.get("headers", {})
|
|
594
|
+
if len(headers) > 5:
|
|
595
|
+
score += 2
|
|
596
|
+
elif len(headers) > 0:
|
|
597
|
+
score += 1
|
|
598
|
+
|
|
599
|
+
return score
|
|
600
|
+
|
|
298
601
|
def _fingerprint(self, url: str, timeout: int = 10) -> Dict[str, Any]:
|
|
299
602
|
"""
|
|
300
603
|
Perform HTTP fingerprinting on target URL.
|
|
@@ -308,46 +611,54 @@ class HttpFingerprintPlugin(PluginBase):
|
|
|
308
611
|
- technologies: List of detected technologies
|
|
309
612
|
- tls: TLS/SSL information (for HTTPS)
|
|
310
613
|
"""
|
|
311
|
-
import urllib.request
|
|
312
614
|
import urllib.error
|
|
615
|
+
import urllib.request
|
|
616
|
+
|
|
617
|
+
# Set global socket timeout to prevent hanging on slow/unresponsive servers
|
|
618
|
+
# This is a safety net - individual requests also have timeouts
|
|
619
|
+
old_timeout = socket.getdefaulttimeout()
|
|
620
|
+
socket.setdefaulttimeout(timeout + 5) # Slightly longer than request timeout
|
|
313
621
|
|
|
314
622
|
result = {
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
623
|
+
"server": None,
|
|
624
|
+
"server_version": None,
|
|
625
|
+
"waf": [],
|
|
626
|
+
"cdn": [],
|
|
627
|
+
"managed_hosting": None,
|
|
628
|
+
"technologies": [],
|
|
629
|
+
"headers": {},
|
|
630
|
+
"cookies": [],
|
|
631
|
+
"tls": None,
|
|
632
|
+
"status_code": None,
|
|
633
|
+
"redirect_url": None,
|
|
326
634
|
}
|
|
327
635
|
|
|
328
636
|
parsed = urlparse(url)
|
|
329
637
|
|
|
330
638
|
# Security: Only allow http/https schemes (B310 - prevent file:// or custom schemes)
|
|
331
|
-
if parsed.scheme not in (
|
|
332
|
-
result[
|
|
639
|
+
if parsed.scheme not in ("http", "https"):
|
|
640
|
+
result["error"] = (
|
|
641
|
+
f"Invalid URL scheme: {parsed.scheme}. Only http/https allowed."
|
|
642
|
+
)
|
|
333
643
|
return result
|
|
334
644
|
|
|
335
|
-
is_https = parsed.scheme ==
|
|
645
|
+
is_https = parsed.scheme == "https"
|
|
336
646
|
|
|
337
647
|
# Check if target is an IP address (for special handling)
|
|
338
648
|
import re
|
|
339
|
-
|
|
649
|
+
|
|
650
|
+
is_ip_target = bool(re.match(r"^(\d{1,3}\.){3}\d{1,3}$", parsed.hostname or ""))
|
|
340
651
|
|
|
341
652
|
# Create request with common browser headers
|
|
342
653
|
req = urllib.request.Request(
|
|
343
654
|
url,
|
|
344
655
|
headers={
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
}
|
|
656
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
657
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
658
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
659
|
+
"Accept-Encoding": "identity",
|
|
660
|
+
"Connection": "close",
|
|
661
|
+
},
|
|
351
662
|
)
|
|
352
663
|
|
|
353
664
|
# Always create SSL context with verification disabled
|
|
@@ -360,79 +671,93 @@ class HttpFingerprintPlugin(PluginBase):
|
|
|
360
671
|
# Get TLS info for HTTPS targets
|
|
361
672
|
if is_https:
|
|
362
673
|
try:
|
|
363
|
-
with socket.create_connection(
|
|
364
|
-
|
|
674
|
+
with socket.create_connection(
|
|
675
|
+
(parsed.hostname, parsed.port or 443), timeout=timeout
|
|
676
|
+
) as sock:
|
|
677
|
+
with ctx.wrap_socket(
|
|
678
|
+
sock, server_hostname=parsed.hostname
|
|
679
|
+
) as ssock:
|
|
365
680
|
cert = ssock.getpeercert(binary_form=True)
|
|
366
681
|
cipher = ssock.cipher()
|
|
367
682
|
version = ssock.version()
|
|
368
|
-
result[
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
683
|
+
result["tls"] = {
|
|
684
|
+
"version": version,
|
|
685
|
+
"cipher": cipher[0] if cipher else None,
|
|
686
|
+
"bits": cipher[2] if cipher else None,
|
|
372
687
|
}
|
|
373
688
|
except Exception:
|
|
374
689
|
pass # TLS info is optional
|
|
375
690
|
|
|
376
691
|
# Always pass SSL context (handles HTTP->HTTPS redirects)
|
|
377
|
-
response = urllib.request.urlopen(
|
|
692
|
+
response = urllib.request.urlopen(
|
|
693
|
+
req, timeout=timeout, context=ctx
|
|
694
|
+
) # nosec B310 - scheme validated above
|
|
378
695
|
|
|
379
|
-
result[
|
|
696
|
+
result["status_code"] = response.getcode()
|
|
380
697
|
|
|
381
698
|
# Get headers
|
|
382
699
|
headers = {k.lower(): v for k, v in response.headers.items()}
|
|
383
|
-
result[
|
|
700
|
+
result["headers"] = dict(response.headers)
|
|
384
701
|
|
|
385
702
|
# Check for redirect
|
|
386
703
|
if response.geturl() != url:
|
|
387
|
-
result[
|
|
704
|
+
result["redirect_url"] = response.geturl()
|
|
388
705
|
|
|
389
706
|
# Parse cookies
|
|
390
|
-
if
|
|
391
|
-
cookies = headers.get(
|
|
392
|
-
result[
|
|
707
|
+
if "set-cookie" in headers:
|
|
708
|
+
cookies = headers.get("set-cookie", "")
|
|
709
|
+
result["cookies"] = [c.strip() for c in cookies.split(",")]
|
|
393
710
|
|
|
394
711
|
# Detect server
|
|
395
|
-
server_header = headers.get(
|
|
396
|
-
result[
|
|
712
|
+
server_header = headers.get("server", "").lower()
|
|
713
|
+
result["server"] = headers.get("server")
|
|
397
714
|
|
|
398
715
|
for sig, name in SERVER_SIGNATURES.items():
|
|
399
716
|
if sig in server_header:
|
|
400
|
-
result[
|
|
401
|
-
result[
|
|
717
|
+
result["server_version"] = name
|
|
718
|
+
result["technologies"].append(name)
|
|
402
719
|
break
|
|
403
720
|
|
|
404
721
|
# Detect WAF
|
|
405
|
-
result[
|
|
722
|
+
result["waf"] = self._detect_waf(headers, result["cookies"])
|
|
406
723
|
|
|
407
724
|
# Detect CDN
|
|
408
|
-
result[
|
|
725
|
+
result["cdn"] = self._detect_cdn(headers, server_header)
|
|
409
726
|
|
|
410
727
|
# Detect managed hosting
|
|
411
|
-
result[
|
|
728
|
+
result["managed_hosting"] = self._detect_managed_hosting(
|
|
729
|
+
headers, server_header, result["cookies"]
|
|
730
|
+
)
|
|
412
731
|
|
|
413
732
|
# Detect technologies from headers
|
|
414
733
|
self._detect_technologies(headers, result)
|
|
415
734
|
|
|
416
735
|
except urllib.error.HTTPError as e:
|
|
417
736
|
# Even errors give us useful headers
|
|
418
|
-
result[
|
|
737
|
+
result["status_code"] = e.code
|
|
419
738
|
headers = {k.lower(): v for k, v in e.headers.items()}
|
|
420
|
-
result[
|
|
421
|
-
result[
|
|
739
|
+
result["headers"] = dict(e.headers)
|
|
740
|
+
result["server"] = headers.get("server")
|
|
422
741
|
|
|
423
|
-
server_header = headers.get(
|
|
424
|
-
result[
|
|
425
|
-
result[
|
|
426
|
-
result[
|
|
742
|
+
server_header = headers.get("server", "").lower()
|
|
743
|
+
result["waf"] = self._detect_waf(headers, [])
|
|
744
|
+
result["cdn"] = self._detect_cdn(headers, server_header)
|
|
745
|
+
result["managed_hosting"] = self._detect_managed_hosting(
|
|
746
|
+
headers, server_header, []
|
|
747
|
+
)
|
|
427
748
|
|
|
428
749
|
except urllib.error.URLError as e:
|
|
429
|
-
result[
|
|
750
|
+
result["error"] = str(e.reason)
|
|
430
751
|
|
|
431
752
|
except socket.timeout:
|
|
432
|
-
result[
|
|
753
|
+
result["error"] = "Connection timed out"
|
|
433
754
|
|
|
434
755
|
except Exception as e:
|
|
435
|
-
result[
|
|
756
|
+
result["error"] = f"{type(e).__name__}: {e}"
|
|
757
|
+
|
|
758
|
+
finally:
|
|
759
|
+
# Restore original socket timeout
|
|
760
|
+
socket.setdefaulttimeout(old_timeout)
|
|
436
761
|
|
|
437
762
|
return result
|
|
438
763
|
|
|
@@ -441,20 +766,20 @@ class HttpFingerprintPlugin(PluginBase):
|
|
|
441
766
|
detected = []
|
|
442
767
|
|
|
443
768
|
# Check headers
|
|
444
|
-
for header, signatures in WAF_SIGNATURES[
|
|
445
|
-
header_val = headers.get(header,
|
|
769
|
+
for header, signatures in WAF_SIGNATURES["headers"].items():
|
|
770
|
+
header_val = headers.get(header, "").lower()
|
|
446
771
|
if header_val:
|
|
447
772
|
if isinstance(signatures, dict):
|
|
448
773
|
for sig, waf_name in signatures.items():
|
|
449
|
-
if sig ==
|
|
774
|
+
if sig == "" or sig in header_val:
|
|
450
775
|
if waf_name and waf_name not in detected:
|
|
451
776
|
detected.append(waf_name)
|
|
452
777
|
elif isinstance(signatures, str) and signatures not in detected:
|
|
453
778
|
detected.append(signatures)
|
|
454
779
|
|
|
455
780
|
# Check cookies
|
|
456
|
-
cookie_str =
|
|
457
|
-
for cookie_sig, waf_name in WAF_SIGNATURES[
|
|
781
|
+
cookie_str = " ".join(cookies).lower()
|
|
782
|
+
for cookie_sig, waf_name in WAF_SIGNATURES["cookies"].items():
|
|
458
783
|
if cookie_sig.lower() in cookie_str:
|
|
459
784
|
if waf_name not in detected:
|
|
460
785
|
detected.append(waf_name)
|
|
@@ -466,8 +791,8 @@ class HttpFingerprintPlugin(PluginBase):
|
|
|
466
791
|
detected = []
|
|
467
792
|
|
|
468
793
|
# Check specific headers
|
|
469
|
-
for header, cdn_info in CDN_SIGNATURES[
|
|
470
|
-
header_val = headers.get(header,
|
|
794
|
+
for header, cdn_info in CDN_SIGNATURES["headers"].items():
|
|
795
|
+
header_val = headers.get(header, "").lower()
|
|
471
796
|
if header_val:
|
|
472
797
|
if isinstance(cdn_info, dict):
|
|
473
798
|
for sig, cdn_name in cdn_info.items():
|
|
@@ -477,28 +802,30 @@ class HttpFingerprintPlugin(PluginBase):
|
|
|
477
802
|
detected.append(cdn_info)
|
|
478
803
|
|
|
479
804
|
# Check server header
|
|
480
|
-
for sig, cdn_name in CDN_SIGNATURES[
|
|
805
|
+
for sig, cdn_name in CDN_SIGNATURES["server"].items():
|
|
481
806
|
if sig in server_header and cdn_name not in detected:
|
|
482
807
|
detected.append(cdn_name)
|
|
483
808
|
|
|
484
809
|
return detected
|
|
485
810
|
|
|
486
|
-
def _detect_managed_hosting(
|
|
811
|
+
def _detect_managed_hosting(
|
|
812
|
+
self, headers: Dict[str, str], server_header: str, cookies: List[str]
|
|
813
|
+
) -> Optional[str]:
|
|
487
814
|
"""Detect managed hosting platform."""
|
|
488
815
|
# Check server header first (most reliable)
|
|
489
|
-
for sig, platform in MANAGED_HOSTING_SIGNATURES[
|
|
816
|
+
for sig, platform in MANAGED_HOSTING_SIGNATURES["server"].items():
|
|
490
817
|
if sig in server_header:
|
|
491
818
|
return platform
|
|
492
819
|
|
|
493
820
|
# Check specific headers
|
|
494
|
-
for header_prefix, platform in MANAGED_HOSTING_SIGNATURES[
|
|
821
|
+
for header_prefix, platform in MANAGED_HOSTING_SIGNATURES["headers"].items():
|
|
495
822
|
for header in headers:
|
|
496
823
|
if header.lower().startswith(header_prefix.lower()):
|
|
497
824
|
return platform
|
|
498
825
|
|
|
499
826
|
# Check cookies
|
|
500
|
-
cookie_str =
|
|
501
|
-
for cookie_sig, platform in MANAGED_HOSTING_SIGNATURES[
|
|
827
|
+
cookie_str = " ".join(cookies).lower()
|
|
828
|
+
for cookie_sig, platform in MANAGED_HOSTING_SIGNATURES["cookies"].items():
|
|
502
829
|
if cookie_sig.lower() in cookie_str:
|
|
503
830
|
return platform
|
|
504
831
|
|
|
@@ -506,31 +833,309 @@ class HttpFingerprintPlugin(PluginBase):
|
|
|
506
833
|
|
|
507
834
|
def _detect_technologies(self, headers: Dict[str, str], result: Dict[str, Any]):
|
|
508
835
|
"""Detect additional technologies from headers."""
|
|
509
|
-
techs = result[
|
|
836
|
+
techs = result["technologies"]
|
|
510
837
|
|
|
511
838
|
# X-Powered-By
|
|
512
|
-
powered_by = headers.get(
|
|
839
|
+
powered_by = headers.get("x-powered-by", "")
|
|
513
840
|
if powered_by:
|
|
514
|
-
if
|
|
515
|
-
techs.append(f
|
|
516
|
-
elif
|
|
517
|
-
techs.append(f
|
|
518
|
-
elif
|
|
519
|
-
techs.append(
|
|
841
|
+
if "php" in powered_by.lower():
|
|
842
|
+
techs.append(f"PHP ({powered_by})")
|
|
843
|
+
elif "asp.net" in powered_by.lower():
|
|
844
|
+
techs.append(f"ASP.NET ({powered_by})")
|
|
845
|
+
elif "express" in powered_by.lower():
|
|
846
|
+
techs.append("Express.js")
|
|
520
847
|
elif powered_by not in techs:
|
|
521
848
|
techs.append(powered_by)
|
|
522
849
|
|
|
523
850
|
# X-AspNet-Version
|
|
524
|
-
aspnet_ver = headers.get(
|
|
851
|
+
aspnet_ver = headers.get("x-aspnet-version", "")
|
|
525
852
|
if aspnet_ver:
|
|
526
|
-
techs.append(f
|
|
853
|
+
techs.append(f"ASP.NET {aspnet_ver}")
|
|
527
854
|
|
|
528
855
|
# X-Generator
|
|
529
|
-
generator = headers.get(
|
|
856
|
+
generator = headers.get("x-generator", "")
|
|
530
857
|
if generator:
|
|
531
858
|
techs.append(generator)
|
|
532
859
|
|
|
533
|
-
result[
|
|
860
|
+
result["technologies"] = list(set(techs))
|
|
861
|
+
|
|
862
|
+
def _fetch_robots_sitemap(self, base_url: str, timeout: int = 10) -> tuple:
|
|
863
|
+
"""
|
|
864
|
+
Fetch robots.txt and sitemap.xml to extract paths for discovery.
|
|
865
|
+
|
|
866
|
+
This runs early in the recon chain so discovered paths can trigger
|
|
867
|
+
follow-up scans even if gobuster's wordlist doesn't include them.
|
|
868
|
+
|
|
869
|
+
Returns:
|
|
870
|
+
tuple: (robots_paths, sitemap_paths) - lists of discovered URLs
|
|
871
|
+
"""
|
|
872
|
+
import re
|
|
873
|
+
import urllib.error
|
|
874
|
+
import urllib.request
|
|
875
|
+
from urllib.parse import urljoin
|
|
876
|
+
|
|
877
|
+
try:
|
|
878
|
+
import defusedxml.ElementTree as ElementTree
|
|
879
|
+
except ImportError:
|
|
880
|
+
import xml.etree.ElementTree as ElementTree
|
|
881
|
+
|
|
882
|
+
parsed = urlparse(base_url)
|
|
883
|
+
base = f"{parsed.scheme}://{parsed.netloc}"
|
|
884
|
+
|
|
885
|
+
robots_paths = []
|
|
886
|
+
sitemap_paths = []
|
|
887
|
+
|
|
888
|
+
# Create SSL context for self-signed certs
|
|
889
|
+
ctx = ssl.create_default_context()
|
|
890
|
+
ctx.check_hostname = False
|
|
891
|
+
ctx.verify_mode = ssl.CERT_NONE
|
|
892
|
+
|
|
893
|
+
# === Fetch robots.txt ===
|
|
894
|
+
try:
|
|
895
|
+
robots_url = urljoin(base + "/", "robots.txt")
|
|
896
|
+
req = urllib.request.Request(
|
|
897
|
+
robots_url,
|
|
898
|
+
headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
|
|
899
|
+
)
|
|
900
|
+
with urllib.request.urlopen(req, timeout=timeout, context=ctx) as response:
|
|
901
|
+
if response.getcode() == 200:
|
|
902
|
+
content = response.read().decode("utf-8", errors="replace")
|
|
903
|
+
|
|
904
|
+
# Known directives to skip
|
|
905
|
+
known_directives = [
|
|
906
|
+
"user-agent:",
|
|
907
|
+
"disallow:",
|
|
908
|
+
"allow:",
|
|
909
|
+
"sitemap:",
|
|
910
|
+
"crawl-delay:",
|
|
911
|
+
"host:",
|
|
912
|
+
"request-rate:",
|
|
913
|
+
]
|
|
914
|
+
|
|
915
|
+
for line in content.split("\n"):
|
|
916
|
+
line = line.strip()
|
|
917
|
+
if not line or line.startswith("#"):
|
|
918
|
+
continue
|
|
919
|
+
|
|
920
|
+
line_lower = line.lower()
|
|
921
|
+
|
|
922
|
+
# Extract Disallow/Allow paths
|
|
923
|
+
if line_lower.startswith("disallow:") or line_lower.startswith(
|
|
924
|
+
"allow:"
|
|
925
|
+
):
|
|
926
|
+
_, _, path = line.partition(":")
|
|
927
|
+
path = path.strip()
|
|
928
|
+
if (
|
|
929
|
+
path
|
|
930
|
+
and path != "/"
|
|
931
|
+
and "*" not in path
|
|
932
|
+
and "?" not in path
|
|
933
|
+
):
|
|
934
|
+
full_url = urljoin(base + "/", path.lstrip("/"))
|
|
935
|
+
if full_url not in robots_paths:
|
|
936
|
+
robots_paths.append(full_url)
|
|
937
|
+
|
|
938
|
+
# Extract Sitemap URLs
|
|
939
|
+
elif line_lower.startswith("sitemap:"):
|
|
940
|
+
_, _, sitemap_url = line.partition(":")
|
|
941
|
+
sitemap_url = sitemap_url.strip()
|
|
942
|
+
# Handle "Sitemap: http://..." format
|
|
943
|
+
if sitemap_url.startswith("//"):
|
|
944
|
+
sitemap_url = parsed.scheme + ":" + sitemap_url
|
|
945
|
+
elif not sitemap_url.startswith("http"):
|
|
946
|
+
sitemap_url = urljoin(
|
|
947
|
+
base + "/", sitemap_url.lstrip("/")
|
|
948
|
+
)
|
|
949
|
+
if sitemap_url not in sitemap_paths:
|
|
950
|
+
sitemap_paths.append(sitemap_url)
|
|
951
|
+
|
|
952
|
+
# Extract bare file paths (CTF-style hints like "key-1-of-3.txt")
|
|
953
|
+
elif not any(
|
|
954
|
+
line_lower.startswith(d) for d in known_directives
|
|
955
|
+
):
|
|
956
|
+
path = line.strip()
|
|
957
|
+
# Must look like a file with extension
|
|
958
|
+
if path and re.match(r"^[\w\-./]+\.\w{1,5}$", path):
|
|
959
|
+
full_url = urljoin(base + "/", path.lstrip("/"))
|
|
960
|
+
if full_url not in robots_paths:
|
|
961
|
+
robots_paths.append(full_url)
|
|
962
|
+
|
|
963
|
+
except Exception:
|
|
964
|
+
pass # robots.txt fetch is optional
|
|
965
|
+
|
|
966
|
+
# === Fetch sitemap.xml (if not found in robots.txt) ===
|
|
967
|
+
if not sitemap_paths:
|
|
968
|
+
sitemap_paths.append(urljoin(base + "/", "sitemap.xml"))
|
|
969
|
+
|
|
970
|
+
# Try to parse each sitemap
|
|
971
|
+
all_sitemap_urls = []
|
|
972
|
+
for sitemap_url in sitemap_paths[:3]: # Limit to first 3 sitemaps
|
|
973
|
+
try:
|
|
974
|
+
req = urllib.request.Request(
|
|
975
|
+
sitemap_url,
|
|
976
|
+
headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
|
|
977
|
+
)
|
|
978
|
+
with urllib.request.urlopen(
|
|
979
|
+
req, timeout=timeout, context=ctx
|
|
980
|
+
) as response:
|
|
981
|
+
if response.getcode() == 200:
|
|
982
|
+
content = response.read().decode("utf-8", errors="replace")
|
|
983
|
+
try:
|
|
984
|
+
root = ElementTree.fromstring(content)
|
|
985
|
+
ns = {"sm": "http://www.sitemaps.org/schemas/sitemap/0.9"}
|
|
986
|
+
|
|
987
|
+
# Try with namespace
|
|
988
|
+
for loc in root.findall(".//sm:loc", ns):
|
|
989
|
+
if loc.text and loc.text not in all_sitemap_urls:
|
|
990
|
+
all_sitemap_urls.append(loc.text.strip())
|
|
991
|
+
|
|
992
|
+
# Try without namespace
|
|
993
|
+
if not all_sitemap_urls:
|
|
994
|
+
for loc in root.findall(".//loc"):
|
|
995
|
+
if loc.text and loc.text not in all_sitemap_urls:
|
|
996
|
+
all_sitemap_urls.append(loc.text.strip())
|
|
997
|
+
|
|
998
|
+
except ElementTree.ParseError:
|
|
999
|
+
# Fallback to regex
|
|
1000
|
+
loc_matches = re.findall(r"<loc>([^<]+)</loc>", content)
|
|
1001
|
+
for url in loc_matches:
|
|
1002
|
+
if url not in all_sitemap_urls:
|
|
1003
|
+
all_sitemap_urls.append(url)
|
|
1004
|
+
|
|
1005
|
+
except Exception:
|
|
1006
|
+
pass # sitemap fetch is optional
|
|
1007
|
+
|
|
1008
|
+
# Replace sitemap_paths with actual URLs from sitemaps (limit to 50)
|
|
1009
|
+
if all_sitemap_urls:
|
|
1010
|
+
sitemap_paths = all_sitemap_urls[:50]
|
|
1011
|
+
else:
|
|
1012
|
+
sitemap_paths = [] # Clear if sitemap didn't exist
|
|
1013
|
+
|
|
1014
|
+
return robots_paths, sitemap_paths
|
|
1015
|
+
|
|
1016
|
+
def _quick_path_probe(self, base_url: str, timeout: int = 10) -> Dict[str, Any]:
|
|
1017
|
+
"""
|
|
1018
|
+
Quick path probing for CMS detection, admin panels, and API indicators.
|
|
1019
|
+
|
|
1020
|
+
Uses HEAD requests to minimize bandwidth and noise. Only checks paths
|
|
1021
|
+
that return 2xx/3xx/401/403 status codes (indicates existence).
|
|
1022
|
+
|
|
1023
|
+
Returns:
|
|
1024
|
+
dict: {
|
|
1025
|
+
'cms': {'name': str, 'paths': list} or None,
|
|
1026
|
+
'admin_panels': [{'path': str, 'status': int}],
|
|
1027
|
+
'api_endpoints': [{'path': str, 'status': int, 'type': str}]
|
|
1028
|
+
}
|
|
1029
|
+
"""
|
|
1030
|
+
import urllib.error
|
|
1031
|
+
import urllib.request
|
|
1032
|
+
|
|
1033
|
+
parsed = urlparse(base_url)
|
|
1034
|
+
base = f"{parsed.scheme}://{parsed.netloc}"
|
|
1035
|
+
|
|
1036
|
+
# Create SSL context for self-signed certs
|
|
1037
|
+
ctx = ssl.create_default_context()
|
|
1038
|
+
ctx.check_hostname = False
|
|
1039
|
+
ctx.verify_mode = ssl.CERT_NONE
|
|
1040
|
+
|
|
1041
|
+
result = {"cms": None, "admin_panels": [], "api_endpoints": []}
|
|
1042
|
+
|
|
1043
|
+
# Define paths to check
|
|
1044
|
+
# Format: (path, category, subcategory/type)
|
|
1045
|
+
paths_to_check = [
|
|
1046
|
+
# CMS Detection
|
|
1047
|
+
("/wp-admin/", "cms", "WordPress"),
|
|
1048
|
+
("/wp-login.php", "cms", "WordPress"),
|
|
1049
|
+
("/wp-includes/", "cms", "WordPress"),
|
|
1050
|
+
("/administrator/", "cms", "Joomla"),
|
|
1051
|
+
("/components/com_content/", "cms", "Joomla"),
|
|
1052
|
+
("/user/login", "cms", "Drupal"),
|
|
1053
|
+
("/core/misc/drupal.js", "cms", "Drupal"),
|
|
1054
|
+
("/typo3/", "cms", "TYPO3"),
|
|
1055
|
+
("/sitecore/", "cms", "Sitecore"),
|
|
1056
|
+
# Admin Panels
|
|
1057
|
+
("/phpmyadmin/", "admin", "phpMyAdmin"),
|
|
1058
|
+
("/pma/", "admin", "phpMyAdmin"),
|
|
1059
|
+
("/admin/", "admin", "Admin Panel"),
|
|
1060
|
+
("/admin/login", "admin", "Admin Login"),
|
|
1061
|
+
("/login/", "admin", "Login Page"),
|
|
1062
|
+
("/login.php", "admin", "Login Page"),
|
|
1063
|
+
("/manager/", "admin", "Manager"),
|
|
1064
|
+
("/cpanel/", "admin", "cPanel"),
|
|
1065
|
+
("/webmail/", "admin", "Webmail"),
|
|
1066
|
+
# API Indicators
|
|
1067
|
+
("/api/", "api", "REST API"),
|
|
1068
|
+
("/api/v1/", "api", "REST API v1"),
|
|
1069
|
+
("/api/v2/", "api", "REST API v2"),
|
|
1070
|
+
("/graphql", "api", "GraphQL"),
|
|
1071
|
+
("/graphql/", "api", "GraphQL"),
|
|
1072
|
+
("/swagger.json", "api", "Swagger/OpenAPI"),
|
|
1073
|
+
("/swagger/", "api", "Swagger UI"),
|
|
1074
|
+
("/openapi.json", "api", "OpenAPI"),
|
|
1075
|
+
("/api-docs/", "api", "API Docs"),
|
|
1076
|
+
("/v1/", "api", "API v1"),
|
|
1077
|
+
("/rest/", "api", "REST API"),
|
|
1078
|
+
]
|
|
1079
|
+
|
|
1080
|
+
# Track CMS detections to avoid duplicates
|
|
1081
|
+
cms_detected = {}
|
|
1082
|
+
|
|
1083
|
+
for path, category, subtype in paths_to_check:
|
|
1084
|
+
try:
|
|
1085
|
+
url = base.rstrip("/") + path
|
|
1086
|
+
req = urllib.request.Request(
|
|
1087
|
+
url,
|
|
1088
|
+
method="HEAD",
|
|
1089
|
+
headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
|
|
1090
|
+
)
|
|
1091
|
+
|
|
1092
|
+
try:
|
|
1093
|
+
with urllib.request.urlopen(
|
|
1094
|
+
req, timeout=timeout, context=ctx
|
|
1095
|
+
) as response:
|
|
1096
|
+
status = response.getcode()
|
|
1097
|
+
except urllib.error.HTTPError as e:
|
|
1098
|
+
status = e.code
|
|
1099
|
+
|
|
1100
|
+
# Consider 2xx, 3xx, 401, 403 as "exists"
|
|
1101
|
+
if status in (200, 201, 204, 301, 302, 303, 307, 308, 401, 403):
|
|
1102
|
+
if category == "cms":
|
|
1103
|
+
if subtype not in cms_detected:
|
|
1104
|
+
cms_detected[subtype] = []
|
|
1105
|
+
cms_detected[subtype].append({"path": path, "status": status})
|
|
1106
|
+
elif category == "admin":
|
|
1107
|
+
result["admin_panels"].append(
|
|
1108
|
+
{
|
|
1109
|
+
"path": path,
|
|
1110
|
+
"name": subtype,
|
|
1111
|
+
"status": status,
|
|
1112
|
+
"url": url,
|
|
1113
|
+
}
|
|
1114
|
+
)
|
|
1115
|
+
elif category == "api":
|
|
1116
|
+
result["api_endpoints"].append(
|
|
1117
|
+
{
|
|
1118
|
+
"path": path,
|
|
1119
|
+
"type": subtype,
|
|
1120
|
+
"status": status,
|
|
1121
|
+
"url": url,
|
|
1122
|
+
}
|
|
1123
|
+
)
|
|
1124
|
+
|
|
1125
|
+
except Exception:
|
|
1126
|
+
# Timeout or connection error - skip this path
|
|
1127
|
+
continue
|
|
1128
|
+
|
|
1129
|
+
# Determine primary CMS (most path matches)
|
|
1130
|
+
if cms_detected:
|
|
1131
|
+
best_cms = max(cms_detected.items(), key=lambda x: len(x[1]))
|
|
1132
|
+
result["cms"] = {
|
|
1133
|
+
"name": best_cms[0],
|
|
1134
|
+
"paths": best_cms[1],
|
|
1135
|
+
"confidence": "high" if len(best_cms[1]) >= 2 else "medium",
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
return result
|
|
534
1139
|
|
|
535
1140
|
def _format_output(self, target: str, result: Dict[str, Any], label: str) -> str:
|
|
536
1141
|
"""Format fingerprint results for log output."""
|
|
@@ -539,33 +1144,50 @@ class HttpFingerprintPlugin(PluginBase):
|
|
|
539
1144
|
lines.append(f"Target: {target}")
|
|
540
1145
|
if label:
|
|
541
1146
|
lines.append(f"Label: {label}")
|
|
542
|
-
lines.append(
|
|
1147
|
+
lines.append(
|
|
1148
|
+
f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}"
|
|
1149
|
+
)
|
|
543
1150
|
lines.append("=" * 60)
|
|
544
1151
|
lines.append("")
|
|
545
1152
|
|
|
546
|
-
if result.get(
|
|
1153
|
+
if result.get("error"):
|
|
547
1154
|
lines.append(f"ERROR: {result['error']}")
|
|
548
|
-
return
|
|
1155
|
+
return "\n".join(lines)
|
|
1156
|
+
|
|
1157
|
+
# Protocol detection info (smart probe results)
|
|
1158
|
+
protocol_detection = result.get("protocol_detection")
|
|
1159
|
+
if protocol_detection in ("upgraded", "fallback"):
|
|
1160
|
+
lines.append("-" * 40)
|
|
1161
|
+
lines.append(f"PROTOCOL DETECTION: {protocol_detection.upper()}")
|
|
1162
|
+
if result.get("protocol_note"):
|
|
1163
|
+
lines.append(f" {result['protocol_note']}")
|
|
1164
|
+
if result.get("original_url"):
|
|
1165
|
+
lines.append(f" Original URL: {result['original_url']}")
|
|
1166
|
+
lines.append(f" Effective URL: {result.get('effective_url', target)}")
|
|
1167
|
+
lines.append("-" * 40)
|
|
1168
|
+
lines.append("")
|
|
549
1169
|
|
|
550
1170
|
# Status
|
|
551
1171
|
lines.append(f"HTTP Status: {result.get('status_code', 'N/A')}")
|
|
552
1172
|
|
|
553
|
-
if result.get(
|
|
1173
|
+
if result.get("redirect_url"):
|
|
554
1174
|
lines.append(f"Redirected to: {result['redirect_url']}")
|
|
555
1175
|
|
|
556
1176
|
# Server
|
|
557
|
-
if result.get(
|
|
1177
|
+
if result.get("server"):
|
|
558
1178
|
lines.append(f"Server: {result['server']}")
|
|
559
1179
|
|
|
560
1180
|
# TLS
|
|
561
|
-
if result.get(
|
|
562
|
-
tls = result[
|
|
563
|
-
lines.append(
|
|
1181
|
+
if result.get("tls"):
|
|
1182
|
+
tls = result["tls"]
|
|
1183
|
+
lines.append(
|
|
1184
|
+
f"TLS: {tls.get('version', 'Unknown')} ({tls.get('cipher', 'Unknown')})"
|
|
1185
|
+
)
|
|
564
1186
|
|
|
565
1187
|
lines.append("")
|
|
566
1188
|
|
|
567
1189
|
# Managed Hosting (most important for tool decisions)
|
|
568
|
-
if result.get(
|
|
1190
|
+
if result.get("managed_hosting"):
|
|
569
1191
|
lines.append("-" * 40)
|
|
570
1192
|
lines.append(f"MANAGED HOSTING DETECTED: {result['managed_hosting']}")
|
|
571
1193
|
lines.append(" -> CGI enumeration will be skipped")
|
|
@@ -574,29 +1196,53 @@ class HttpFingerprintPlugin(PluginBase):
|
|
|
574
1196
|
lines.append("")
|
|
575
1197
|
|
|
576
1198
|
# WAF
|
|
577
|
-
if result.get(
|
|
1199
|
+
if result.get("waf"):
|
|
578
1200
|
lines.append(f"WAF/Protection Detected:")
|
|
579
|
-
for waf in result[
|
|
1201
|
+
for waf in result["waf"]:
|
|
580
1202
|
lines.append(f" - {waf}")
|
|
581
1203
|
lines.append("")
|
|
582
1204
|
|
|
583
1205
|
# CDN
|
|
584
|
-
if result.get(
|
|
1206
|
+
if result.get("cdn"):
|
|
585
1207
|
lines.append(f"CDN Detected:")
|
|
586
|
-
for cdn in result[
|
|
1208
|
+
for cdn in result["cdn"]:
|
|
587
1209
|
lines.append(f" - {cdn}")
|
|
588
1210
|
lines.append("")
|
|
589
1211
|
|
|
590
1212
|
# Technologies
|
|
591
|
-
if result.get(
|
|
1213
|
+
if result.get("technologies"):
|
|
592
1214
|
lines.append(f"Technologies:")
|
|
593
|
-
for tech in result[
|
|
1215
|
+
for tech in result["technologies"]:
|
|
594
1216
|
lines.append(f" - {tech}")
|
|
595
1217
|
lines.append("")
|
|
596
1218
|
|
|
597
|
-
|
|
1219
|
+
# Robots.txt paths (discovered files/directories)
|
|
1220
|
+
robots_paths = result.get("robots_paths", [])
|
|
1221
|
+
if robots_paths:
|
|
1222
|
+
lines.append("-" * 40)
|
|
1223
|
+
lines.append(f"ROBOTS.TXT PATHS ({len(robots_paths)} found):")
|
|
1224
|
+
for path in robots_paths[:20]:
|
|
1225
|
+
lines.append(f" - {path}")
|
|
1226
|
+
if len(robots_paths) > 20:
|
|
1227
|
+
lines.append(f" ... and {len(robots_paths) - 20} more")
|
|
1228
|
+
lines.append("-" * 40)
|
|
1229
|
+
lines.append("")
|
|
1230
|
+
|
|
1231
|
+
# Sitemap URLs
|
|
1232
|
+
sitemap_paths = result.get("sitemap_paths", [])
|
|
1233
|
+
if sitemap_paths:
|
|
1234
|
+
lines.append(f"SITEMAP URLS ({len(sitemap_paths)} found):")
|
|
1235
|
+
for url in sitemap_paths[:10]:
|
|
1236
|
+
lines.append(f" - {url}")
|
|
1237
|
+
if len(sitemap_paths) > 10:
|
|
1238
|
+
lines.append(f" ... and {len(sitemap_paths) - 10} more")
|
|
1239
|
+
lines.append("")
|
|
1240
|
+
|
|
1241
|
+
lines.append(
|
|
1242
|
+
f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ==="
|
|
1243
|
+
)
|
|
598
1244
|
|
|
599
|
-
return
|
|
1245
|
+
return "\n".join(lines)
|
|
600
1246
|
|
|
601
1247
|
|
|
602
1248
|
plugin = HttpFingerprintPlugin()
|