souleyez 2.43.29__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- souleyez/__init__.py +1 -2
- souleyez/ai/__init__.py +21 -15
- souleyez/ai/action_mapper.py +249 -150
- souleyez/ai/chain_advisor.py +116 -100
- souleyez/ai/claude_provider.py +29 -28
- souleyez/ai/context_builder.py +80 -62
- souleyez/ai/executor.py +158 -117
- souleyez/ai/feedback_handler.py +136 -121
- souleyez/ai/llm_factory.py +27 -20
- souleyez/ai/llm_provider.py +4 -2
- souleyez/ai/ollama_provider.py +6 -9
- souleyez/ai/ollama_service.py +44 -37
- souleyez/ai/path_scorer.py +91 -76
- souleyez/ai/recommender.py +176 -144
- souleyez/ai/report_context.py +74 -73
- souleyez/ai/report_service.py +84 -66
- souleyez/ai/result_parser.py +222 -229
- souleyez/ai/safety.py +67 -44
- souleyez/auth/__init__.py +23 -22
- souleyez/auth/audit.py +36 -26
- souleyez/auth/engagement_access.py +65 -48
- souleyez/auth/permissions.py +14 -3
- souleyez/auth/session_manager.py +54 -37
- souleyez/auth/user_manager.py +109 -64
- souleyez/commands/audit.py +40 -43
- souleyez/commands/auth.py +35 -15
- souleyez/commands/deliverables.py +55 -50
- souleyez/commands/engagement.py +47 -28
- souleyez/commands/license.py +32 -23
- souleyez/commands/screenshots.py +36 -32
- souleyez/commands/user.py +82 -36
- souleyez/config.py +52 -44
- souleyez/core/credential_tester.py +87 -81
- souleyez/core/cve_mappings.py +179 -192
- souleyez/core/cve_matcher.py +162 -148
- souleyez/core/msf_auto_mapper.py +100 -83
- souleyez/core/msf_chain_engine.py +294 -256
- souleyez/core/msf_database.py +153 -70
- souleyez/core/msf_integration.py +679 -673
- souleyez/core/msf_rpc_client.py +40 -42
- souleyez/core/msf_rpc_manager.py +77 -79
- souleyez/core/msf_sync_manager.py +241 -181
- souleyez/core/network_utils.py +22 -15
- souleyez/core/parser_handler.py +34 -25
- souleyez/core/pending_chains.py +114 -63
- souleyez/core/templates.py +158 -107
- souleyez/core/tool_chaining.py +9564 -2881
- souleyez/core/version_utils.py +79 -94
- souleyez/core/vuln_correlation.py +136 -89
- souleyez/core/web_utils.py +33 -32
- souleyez/data/wordlists/ad_users.txt +378 -0
- souleyez/data/wordlists/api_endpoints_large.txt +769 -0
- souleyez/data/wordlists/home_dir_sensitive.txt +39 -0
- souleyez/data/wordlists/lfi_payloads.txt +82 -0
- souleyez/data/wordlists/passwords_brute.txt +1548 -0
- souleyez/data/wordlists/passwords_crack.txt +2479 -0
- souleyez/data/wordlists/passwords_spray.txt +386 -0
- souleyez/data/wordlists/subdomains_large.txt +5057 -0
- souleyez/data/wordlists/usernames_common.txt +694 -0
- souleyez/data/wordlists/web_dirs_large.txt +4769 -0
- souleyez/detection/__init__.py +1 -1
- souleyez/detection/attack_signatures.py +12 -17
- souleyez/detection/mitre_mappings.py +61 -55
- souleyez/detection/validator.py +97 -86
- souleyez/devtools.py +23 -10
- souleyez/docs/README.md +4 -4
- souleyez/docs/api-reference/cli-commands.md +2 -2
- souleyez/docs/developer-guide/adding-new-tools.md +562 -0
- souleyez/docs/user-guide/auto-chaining.md +30 -8
- souleyez/docs/user-guide/getting-started.md +1 -1
- souleyez/docs/user-guide/installation.md +26 -3
- souleyez/docs/user-guide/metasploit-integration.md +2 -2
- souleyez/docs/user-guide/rbac.md +1 -1
- souleyez/docs/user-guide/scope-management.md +1 -1
- souleyez/docs/user-guide/siem-integration.md +1 -1
- souleyez/docs/user-guide/tools-reference.md +1 -8
- souleyez/docs/user-guide/worker-management.md +1 -1
- souleyez/engine/background.py +1239 -535
- souleyez/engine/base.py +4 -1
- souleyez/engine/job_status.py +17 -49
- souleyez/engine/log_sanitizer.py +103 -77
- souleyez/engine/manager.py +38 -7
- souleyez/engine/result_handler.py +2200 -1550
- souleyez/engine/worker_manager.py +50 -41
- souleyez/export/evidence_bundle.py +72 -62
- souleyez/feature_flags/features.py +16 -20
- souleyez/feature_flags.py +5 -9
- souleyez/handlers/__init__.py +11 -0
- souleyez/handlers/base.py +188 -0
- souleyez/handlers/bash_handler.py +277 -0
- souleyez/handlers/bloodhound_handler.py +243 -0
- souleyez/handlers/certipy_handler.py +311 -0
- souleyez/handlers/crackmapexec_handler.py +486 -0
- souleyez/handlers/dnsrecon_handler.py +344 -0
- souleyez/handlers/enum4linux_handler.py +400 -0
- souleyez/handlers/evil_winrm_handler.py +493 -0
- souleyez/handlers/ffuf_handler.py +815 -0
- souleyez/handlers/gobuster_handler.py +1114 -0
- souleyez/handlers/gpp_extract_handler.py +334 -0
- souleyez/handlers/hashcat_handler.py +444 -0
- souleyez/handlers/hydra_handler.py +564 -0
- souleyez/handlers/impacket_getuserspns_handler.py +343 -0
- souleyez/handlers/impacket_psexec_handler.py +222 -0
- souleyez/handlers/impacket_secretsdump_handler.py +426 -0
- souleyez/handlers/john_handler.py +286 -0
- souleyez/handlers/katana_handler.py +425 -0
- souleyez/handlers/kerbrute_handler.py +298 -0
- souleyez/handlers/ldapsearch_handler.py +636 -0
- souleyez/handlers/lfi_extract_handler.py +464 -0
- souleyez/handlers/msf_auxiliary_handler.py +409 -0
- souleyez/handlers/msf_exploit_handler.py +380 -0
- souleyez/handlers/nikto_handler.py +413 -0
- souleyez/handlers/nmap_handler.py +821 -0
- souleyez/handlers/nuclei_handler.py +359 -0
- souleyez/handlers/nxc_handler.py +417 -0
- souleyez/handlers/rdp_sec_check_handler.py +353 -0
- souleyez/handlers/registry.py +292 -0
- souleyez/handlers/responder_handler.py +232 -0
- souleyez/handlers/service_explorer_handler.py +434 -0
- souleyez/handlers/smbclient_handler.py +344 -0
- souleyez/handlers/smbmap_handler.py +510 -0
- souleyez/handlers/smbpasswd_handler.py +296 -0
- souleyez/handlers/sqlmap_handler.py +1116 -0
- souleyez/handlers/theharvester_handler.py +601 -0
- souleyez/handlers/web_login_test_handler.py +327 -0
- souleyez/handlers/whois_handler.py +277 -0
- souleyez/handlers/wpscan_handler.py +554 -0
- souleyez/history.py +32 -16
- souleyez/importers/msf_importer.py +106 -75
- souleyez/importers/smart_importer.py +208 -147
- souleyez/integrations/siem/__init__.py +10 -10
- souleyez/integrations/siem/base.py +17 -18
- souleyez/integrations/siem/elastic.py +108 -122
- souleyez/integrations/siem/factory.py +207 -80
- souleyez/integrations/siem/googlesecops.py +146 -154
- souleyez/integrations/siem/rule_mappings/__init__.py +1 -1
- souleyez/integrations/siem/rule_mappings/wazuh_rules.py +8 -5
- souleyez/integrations/siem/sentinel.py +107 -109
- souleyez/integrations/siem/splunk.py +246 -212
- souleyez/integrations/siem/wazuh.py +65 -71
- souleyez/integrations/wazuh/__init__.py +5 -5
- souleyez/integrations/wazuh/client.py +70 -93
- souleyez/integrations/wazuh/config.py +85 -57
- souleyez/integrations/wazuh/host_mapper.py +28 -36
- souleyez/integrations/wazuh/sync.py +78 -68
- souleyez/intelligence/__init__.py +4 -5
- souleyez/intelligence/correlation_analyzer.py +309 -295
- souleyez/intelligence/exploit_knowledge.py +661 -623
- souleyez/intelligence/exploit_suggestions.py +159 -139
- souleyez/intelligence/gap_analyzer.py +132 -97
- souleyez/intelligence/gap_detector.py +251 -214
- souleyez/intelligence/sensitive_tables.py +266 -129
- souleyez/intelligence/service_parser.py +137 -123
- souleyez/intelligence/surface_analyzer.py +407 -268
- souleyez/intelligence/target_parser.py +159 -162
- souleyez/licensing/__init__.py +6 -6
- souleyez/licensing/validator.py +17 -19
- souleyez/log_config.py +79 -54
- souleyez/main.py +1505 -687
- souleyez/migrations/fix_job_counter.py +16 -14
- souleyez/parsers/bloodhound_parser.py +41 -39
- souleyez/parsers/crackmapexec_parser.py +178 -111
- souleyez/parsers/dalfox_parser.py +72 -77
- souleyez/parsers/dnsrecon_parser.py +103 -91
- souleyez/parsers/enum4linux_parser.py +183 -153
- souleyez/parsers/ffuf_parser.py +29 -25
- souleyez/parsers/gobuster_parser.py +301 -41
- souleyez/parsers/hashcat_parser.py +324 -79
- souleyez/parsers/http_fingerprint_parser.py +350 -103
- souleyez/parsers/hydra_parser.py +131 -111
- souleyez/parsers/impacket_parser.py +231 -178
- souleyez/parsers/john_parser.py +98 -86
- souleyez/parsers/katana_parser.py +316 -0
- souleyez/parsers/msf_parser.py +943 -498
- souleyez/parsers/nikto_parser.py +346 -65
- souleyez/parsers/nmap_parser.py +262 -174
- souleyez/parsers/nuclei_parser.py +40 -44
- souleyez/parsers/responder_parser.py +26 -26
- souleyez/parsers/searchsploit_parser.py +74 -74
- souleyez/parsers/service_explorer_parser.py +279 -0
- souleyez/parsers/smbmap_parser.py +180 -124
- souleyez/parsers/sqlmap_parser.py +434 -308
- souleyez/parsers/theharvester_parser.py +75 -57
- souleyez/parsers/whois_parser.py +135 -94
- souleyez/parsers/wpscan_parser.py +278 -190
- souleyez/plugins/afp.py +44 -36
- souleyez/plugins/afp_brute.py +114 -46
- souleyez/plugins/ard.py +48 -37
- souleyez/plugins/bloodhound.py +95 -61
- souleyez/plugins/certipy.py +303 -0
- souleyez/plugins/crackmapexec.py +186 -85
- souleyez/plugins/dalfox.py +120 -59
- souleyez/plugins/dns_hijack.py +146 -41
- souleyez/plugins/dnsrecon.py +97 -61
- souleyez/plugins/enum4linux.py +91 -66
- souleyez/plugins/evil_winrm.py +291 -0
- souleyez/plugins/ffuf.py +166 -90
- souleyez/plugins/firmware_extract.py +133 -29
- souleyez/plugins/gobuster.py +387 -190
- souleyez/plugins/gpp_extract.py +393 -0
- souleyez/plugins/hashcat.py +100 -73
- souleyez/plugins/http_fingerprint.py +913 -267
- souleyez/plugins/hydra.py +566 -200
- souleyez/plugins/impacket_getnpusers.py +117 -69
- souleyez/plugins/impacket_psexec.py +84 -64
- souleyez/plugins/impacket_secretsdump.py +103 -69
- souleyez/plugins/impacket_smbclient.py +89 -75
- souleyez/plugins/john.py +86 -69
- souleyez/plugins/katana.py +313 -0
- souleyez/plugins/kerbrute.py +237 -0
- souleyez/plugins/lfi_extract.py +541 -0
- souleyez/plugins/macos_ssh.py +117 -48
- souleyez/plugins/mdns.py +35 -30
- souleyez/plugins/msf_auxiliary.py +253 -130
- souleyez/plugins/msf_exploit.py +239 -161
- souleyez/plugins/nikto.py +134 -78
- souleyez/plugins/nmap.py +275 -91
- souleyez/plugins/nuclei.py +180 -89
- souleyez/plugins/nxc.py +285 -0
- souleyez/plugins/plugin_base.py +35 -36
- souleyez/plugins/plugin_template.py +13 -5
- souleyez/plugins/rdp_sec_check.py +130 -0
- souleyez/plugins/responder.py +112 -71
- souleyez/plugins/router_http_brute.py +76 -65
- souleyez/plugins/router_ssh_brute.py +118 -41
- souleyez/plugins/router_telnet_brute.py +124 -42
- souleyez/plugins/routersploit.py +91 -59
- souleyez/plugins/routersploit_exploit.py +77 -55
- souleyez/plugins/searchsploit.py +91 -77
- souleyez/plugins/service_explorer.py +1160 -0
- souleyez/plugins/smbmap.py +122 -72
- souleyez/plugins/smbpasswd.py +215 -0
- souleyez/plugins/sqlmap.py +301 -113
- souleyez/plugins/theharvester.py +127 -75
- souleyez/plugins/tr069.py +79 -57
- souleyez/plugins/upnp.py +65 -47
- souleyez/plugins/upnp_abuse.py +73 -55
- souleyez/plugins/vnc_access.py +129 -42
- souleyez/plugins/vnc_brute.py +109 -38
- souleyez/plugins/web_login_test.py +417 -0
- souleyez/plugins/whois.py +77 -58
- souleyez/plugins/wpscan.py +219 -69
- souleyez/reporting/__init__.py +2 -1
- souleyez/reporting/attack_chain.py +411 -346
- souleyez/reporting/charts.py +436 -501
- souleyez/reporting/compliance_mappings.py +334 -201
- souleyez/reporting/detection_report.py +126 -125
- souleyez/reporting/formatters.py +828 -591
- souleyez/reporting/generator.py +386 -302
- souleyez/reporting/metrics.py +72 -75
- souleyez/scanner.py +35 -29
- souleyez/security/__init__.py +37 -11
- souleyez/security/scope_validator.py +175 -106
- souleyez/security/validation.py +237 -149
- souleyez/security.py +22 -6
- souleyez/storage/credentials.py +247 -186
- souleyez/storage/crypto.py +296 -129
- souleyez/storage/database.py +73 -50
- souleyez/storage/db.py +58 -36
- souleyez/storage/deliverable_evidence.py +177 -128
- souleyez/storage/deliverable_exporter.py +282 -246
- souleyez/storage/deliverable_templates.py +134 -116
- souleyez/storage/deliverables.py +135 -130
- souleyez/storage/engagements.py +109 -56
- souleyez/storage/evidence.py +181 -152
- souleyez/storage/execution_log.py +31 -17
- souleyez/storage/exploit_attempts.py +93 -57
- souleyez/storage/exploits.py +67 -36
- souleyez/storage/findings.py +48 -61
- souleyez/storage/hosts.py +176 -144
- souleyez/storage/migrate_to_engagements.py +43 -19
- souleyez/storage/migrations/_001_add_credential_enhancements.py +22 -12
- souleyez/storage/migrations/_002_add_status_tracking.py +10 -7
- souleyez/storage/migrations/_003_add_execution_log.py +14 -8
- souleyez/storage/migrations/_005_screenshots.py +13 -5
- souleyez/storage/migrations/_006_deliverables.py +13 -5
- souleyez/storage/migrations/_007_deliverable_templates.py +12 -7
- souleyez/storage/migrations/_008_add_nuclei_table.py +10 -4
- souleyez/storage/migrations/_010_evidence_linking.py +17 -10
- souleyez/storage/migrations/_011_timeline_tracking.py +20 -13
- souleyez/storage/migrations/_012_team_collaboration.py +34 -21
- souleyez/storage/migrations/_013_add_host_tags.py +12 -6
- souleyez/storage/migrations/_014_exploit_attempts.py +22 -10
- souleyez/storage/migrations/_015_add_mac_os_fields.py +15 -7
- souleyez/storage/migrations/_016_add_domain_field.py +10 -4
- souleyez/storage/migrations/_017_msf_sessions.py +16 -8
- souleyez/storage/migrations/_018_add_osint_target.py +10 -6
- souleyez/storage/migrations/_019_add_engagement_type.py +10 -6
- souleyez/storage/migrations/_020_add_rbac.py +36 -15
- souleyez/storage/migrations/_021_wazuh_integration.py +20 -8
- souleyez/storage/migrations/_022_wazuh_indexer_columns.py +6 -4
- souleyez/storage/migrations/_023_fix_detection_results_fk.py +16 -6
- souleyez/storage/migrations/_024_wazuh_vulnerabilities.py +26 -10
- souleyez/storage/migrations/_025_multi_siem_support.py +3 -5
- souleyez/storage/migrations/_026_add_engagement_scope.py +31 -12
- souleyez/storage/migrations/_027_multi_siem_persistence.py +32 -15
- souleyez/storage/migrations/__init__.py +26 -26
- souleyez/storage/migrations/migration_manager.py +19 -19
- souleyez/storage/msf_sessions.py +100 -65
- souleyez/storage/osint.py +17 -24
- souleyez/storage/recommendation_engine.py +269 -235
- souleyez/storage/screenshots.py +33 -32
- souleyez/storage/smb_shares.py +136 -92
- souleyez/storage/sqlmap_data.py +183 -128
- souleyez/storage/team_collaboration.py +135 -141
- souleyez/storage/timeline_tracker.py +122 -94
- souleyez/storage/wazuh_vulns.py +64 -66
- souleyez/storage/web_paths.py +33 -37
- souleyez/testing/credential_tester.py +221 -205
- souleyez/ui/__init__.py +1 -1
- souleyez/ui/ai_quotes.py +12 -12
- souleyez/ui/attack_surface.py +2439 -1516
- souleyez/ui/chain_rules_view.py +914 -382
- souleyez/ui/correlation_view.py +312 -230
- souleyez/ui/dashboard.py +2382 -1130
- souleyez/ui/deliverables_view.py +148 -62
- souleyez/ui/design_system.py +13 -13
- souleyez/ui/errors.py +49 -49
- souleyez/ui/evidence_linking_view.py +284 -179
- souleyez/ui/evidence_vault.py +393 -285
- souleyez/ui/exploit_suggestions_view.py +555 -349
- souleyez/ui/export_view.py +100 -66
- souleyez/ui/gap_analysis_view.py +315 -171
- souleyez/ui/help_system.py +105 -97
- souleyez/ui/intelligence_view.py +436 -293
- souleyez/ui/interactive.py +23034 -10679
- souleyez/ui/interactive_selector.py +75 -68
- souleyez/ui/log_formatter.py +47 -39
- souleyez/ui/menu_components.py +22 -13
- souleyez/ui/msf_auxiliary_menu.py +184 -133
- souleyez/ui/pending_chains_view.py +336 -172
- souleyez/ui/progress_indicators.py +5 -3
- souleyez/ui/recommendations_view.py +195 -137
- souleyez/ui/rule_builder.py +343 -225
- souleyez/ui/setup_wizard.py +678 -284
- souleyez/ui/shortcuts.py +217 -165
- souleyez/ui/splunk_gap_analysis_view.py +452 -270
- souleyez/ui/splunk_vulns_view.py +139 -86
- souleyez/ui/team_dashboard.py +498 -335
- souleyez/ui/template_selector.py +196 -105
- souleyez/ui/terminal.py +6 -6
- souleyez/ui/timeline_view.py +198 -127
- souleyez/ui/tool_setup.py +264 -164
- souleyez/ui/tutorial.py +202 -72
- souleyez/ui/tutorial_state.py +40 -40
- souleyez/ui/wazuh_vulns_view.py +235 -141
- souleyez/ui/wordlist_browser.py +260 -107
- souleyez/ui.py +464 -312
- souleyez/utils/tool_checker.py +427 -367
- souleyez/utils.py +33 -29
- souleyez/wordlists.py +134 -167
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/METADATA +2 -2
- souleyez-3.0.0.dist-info/RECORD +443 -0
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/WHEEL +1 -1
- souleyez-2.43.29.dist-info/RECORD +0 -379
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/entry_points.txt +0 -0
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/licenses/LICENSE +0 -0
- {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/top_level.txt +0 -0
|
@@ -47,102 +47,120 @@ def parse_theharvester_output(output: str, target: str = "") -> Dict[str, Any]:
|
|
|
47
47
|
}
|
|
48
48
|
"""
|
|
49
49
|
result = {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
50
|
+
"target": target,
|
|
51
|
+
"emails": [],
|
|
52
|
+
"hosts": [],
|
|
53
|
+
"ips": [],
|
|
54
|
+
"urls": [],
|
|
55
|
+
"asns": [],
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
lines = output.split(
|
|
58
|
+
lines = output.split("\n")
|
|
59
59
|
current_section = None
|
|
60
60
|
|
|
61
61
|
for line in lines:
|
|
62
62
|
line = line.strip()
|
|
63
63
|
|
|
64
64
|
# Detect target
|
|
65
|
-
if line.startswith(
|
|
66
|
-
target_match = re.search(r
|
|
65
|
+
if line.startswith("[*] Target:"):
|
|
66
|
+
target_match = re.search(r"\[?\*\]?\s*Target:\s*(\S+)", line)
|
|
67
67
|
if target_match:
|
|
68
|
-
result[
|
|
68
|
+
result["target"] = target_match.group(1)
|
|
69
69
|
|
|
70
70
|
# Detect section headers (case-insensitive, multiple format variations)
|
|
71
71
|
line_lower = line.lower()
|
|
72
|
-
if any(
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
current_section =
|
|
76
|
-
elif any(
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
current_section =
|
|
80
|
-
elif any(x in line_lower for x in [
|
|
81
|
-
current_section =
|
|
82
|
-
elif any(
|
|
83
|
-
|
|
72
|
+
if any(
|
|
73
|
+
x in line_lower for x in ["asns found", "asn found", "autonomous system"]
|
|
74
|
+
):
|
|
75
|
+
current_section = "asns"
|
|
76
|
+
elif any(
|
|
77
|
+
x in line_lower for x in ["urls found", "interesting urls", "url found"]
|
|
78
|
+
):
|
|
79
|
+
current_section = "urls"
|
|
80
|
+
elif any(x in line_lower for x in ["ips found", "ip found", "ip addresses"]):
|
|
81
|
+
current_section = "ips"
|
|
82
|
+
elif any(
|
|
83
|
+
x in line_lower for x in ["emails found", "email found", "email addresses"]
|
|
84
|
+
):
|
|
85
|
+
current_section = "emails"
|
|
86
|
+
elif any(
|
|
87
|
+
x in line_lower
|
|
88
|
+
for x in [
|
|
89
|
+
"hosts found",
|
|
90
|
+
"host found",
|
|
91
|
+
"subdomains found",
|
|
92
|
+
"subdomain found",
|
|
93
|
+
]
|
|
94
|
+
):
|
|
95
|
+
current_section = "hosts"
|
|
96
|
+
elif any(
|
|
97
|
+
x in line_lower for x in ["people found", "no people found", "linkedin"]
|
|
98
|
+
):
|
|
99
|
+
current_section = "people" # We'll skip this for now
|
|
84
100
|
|
|
85
101
|
# Skip separator lines and empty lines
|
|
86
|
-
elif line.startswith(
|
|
102
|
+
elif line.startswith("---") or not line:
|
|
87
103
|
continue
|
|
88
104
|
|
|
89
105
|
# Skip "No X found" messages
|
|
90
|
-
elif
|
|
106
|
+
elif "[*] No" in line:
|
|
91
107
|
current_section = None
|
|
92
108
|
continue
|
|
93
109
|
|
|
94
110
|
# Skip header/banner lines
|
|
95
|
-
elif line.startswith(
|
|
111
|
+
elif line.startswith("*") or line.startswith("[*] Searching"):
|
|
96
112
|
continue
|
|
97
113
|
|
|
98
114
|
# Parse data based on current section
|
|
99
|
-
elif current_section ==
|
|
115
|
+
elif current_section == "asns":
|
|
100
116
|
# ASN format: AS12345
|
|
101
|
-
if line.startswith(
|
|
102
|
-
result[
|
|
117
|
+
if line.startswith("AS") and line[2:].isdigit():
|
|
118
|
+
result["asns"].append(line)
|
|
103
119
|
|
|
104
|
-
elif current_section ==
|
|
120
|
+
elif current_section == "urls":
|
|
105
121
|
# URL format: http(s)://...
|
|
106
|
-
if line.startswith(
|
|
122
|
+
if line.startswith("http://") or line.startswith("https://"):
|
|
107
123
|
# Clean up trailing punctuation
|
|
108
|
-
url = line.rstrip(
|
|
109
|
-
if url not in result[
|
|
110
|
-
result[
|
|
124
|
+
url = line.rstrip(".,;)")
|
|
125
|
+
if url not in result["urls"]:
|
|
126
|
+
result["urls"].append(url)
|
|
111
127
|
|
|
112
|
-
elif current_section ==
|
|
128
|
+
elif current_section == "ips":
|
|
113
129
|
# IP format: N.N.N.N
|
|
114
|
-
if re.match(r
|
|
115
|
-
if line not in result[
|
|
116
|
-
result[
|
|
130
|
+
if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", line):
|
|
131
|
+
if line not in result["ips"]:
|
|
132
|
+
result["ips"].append(line)
|
|
117
133
|
|
|
118
|
-
elif current_section ==
|
|
134
|
+
elif current_section == "emails":
|
|
119
135
|
# Email format: user@domain
|
|
120
|
-
if
|
|
136
|
+
if "@" in line and "." in line:
|
|
121
137
|
# More permissive email validation (supports international domains)
|
|
122
138
|
# Pattern allows: standard emails, plus-addressing, dots, underscores
|
|
123
139
|
email = line.strip().lower()
|
|
124
140
|
# Remove any leading/trailing brackets or quotes
|
|
125
|
-
email = re.sub(r
|
|
126
|
-
if re.match(
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
141
|
+
email = re.sub(r"^[\[\(<\'\"]+|[\]\)>\'\"]$", "", email)
|
|
142
|
+
if re.match(
|
|
143
|
+
r"^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$", email
|
|
144
|
+
):
|
|
145
|
+
if email not in result["emails"]:
|
|
146
|
+
result["emails"].append(email)
|
|
147
|
+
|
|
148
|
+
elif current_section == "hosts":
|
|
131
149
|
# Host format: subdomain.domain.tld
|
|
132
|
-
if
|
|
150
|
+
if "." in line and not line.startswith("http"):
|
|
133
151
|
# Clean and validate hostname
|
|
134
152
|
host = line.strip().lower()
|
|
135
153
|
# Remove any leading/trailing brackets, quotes, or trailing dots
|
|
136
|
-
host = re.sub(r
|
|
154
|
+
host = re.sub(r"^[\[\(<\'\"]+|[\]\)>\'\".]+$", "", host)
|
|
137
155
|
# More permissive validation: allows underscores (common in some hosts)
|
|
138
156
|
# and longer TLDs (some are 4+ chars)
|
|
139
|
-
if re.match(r
|
|
140
|
-
if host not in result[
|
|
141
|
-
result[
|
|
157
|
+
if re.match(r"^[a-zA-Z0-9._-]+\.[a-zA-Z]{2,}$", host) and len(host) > 3:
|
|
158
|
+
if host not in result["hosts"]:
|
|
159
|
+
result["hosts"].append(host)
|
|
142
160
|
|
|
143
161
|
# Add alias fields for backward compatibility with display code
|
|
144
|
-
result[
|
|
145
|
-
result[
|
|
162
|
+
result["subdomains"] = result["hosts"] # Alias for display
|
|
163
|
+
result["base_urls"] = result["urls"] # Alias for display
|
|
146
164
|
|
|
147
165
|
return result
|
|
148
166
|
|
|
@@ -158,9 +176,9 @@ def get_osint_stats(parsed: Dict[str, Any]) -> Dict[str, int]:
|
|
|
158
176
|
Dict with counts: {'emails': 5, 'hosts': 10, ...}
|
|
159
177
|
"""
|
|
160
178
|
return {
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
179
|
+
"emails": len(parsed.get("emails", [])),
|
|
180
|
+
"hosts": len(parsed.get("hosts", [])),
|
|
181
|
+
"ips": len(parsed.get("ips", [])),
|
|
182
|
+
"urls": len(parsed.get("urls", [])),
|
|
183
|
+
"asns": len(parsed.get("asns", [])),
|
|
166
184
|
}
|
souleyez/parsers/whois_parser.py
CHANGED
|
@@ -51,123 +51,149 @@ def parse_whois_output(output: str, target: str = "") -> Dict[str, Any]:
|
|
|
51
51
|
}
|
|
52
52
|
"""
|
|
53
53
|
result = {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
54
|
+
"domain": target,
|
|
55
|
+
"registrar": None,
|
|
56
|
+
"registrant": {},
|
|
57
|
+
"admin_contact": {},
|
|
58
|
+
"tech_contact": {},
|
|
59
|
+
"dates": {},
|
|
60
|
+
"nameservers": [],
|
|
61
|
+
"status": [],
|
|
62
|
+
"dnssec": None,
|
|
63
63
|
}
|
|
64
64
|
|
|
65
|
-
lines = output.split(
|
|
65
|
+
lines = output.split("\n")
|
|
66
66
|
current_section = None
|
|
67
67
|
|
|
68
68
|
for line in lines:
|
|
69
69
|
line_stripped = line.strip()
|
|
70
70
|
|
|
71
71
|
# Skip comments and empty lines
|
|
72
|
-
if
|
|
72
|
+
if (
|
|
73
|
+
not line_stripped
|
|
74
|
+
or line_stripped.startswith("%")
|
|
75
|
+
or line_stripped.startswith("#")
|
|
76
|
+
):
|
|
73
77
|
continue
|
|
74
78
|
|
|
75
79
|
# Convert to lowercase for matching
|
|
76
80
|
line_lower = line_stripped.lower()
|
|
77
81
|
|
|
78
82
|
# Extract domain name
|
|
79
|
-
if not result[
|
|
80
|
-
domain_match = re.match(
|
|
83
|
+
if not result["domain"] or result["domain"] == "":
|
|
84
|
+
domain_match = re.match(
|
|
85
|
+
r"domain name:\s+(.+)", line_stripped, re.IGNORECASE
|
|
86
|
+
)
|
|
81
87
|
if domain_match:
|
|
82
|
-
result[
|
|
88
|
+
result["domain"] = domain_match.group(1).strip()
|
|
83
89
|
|
|
84
90
|
# Extract registrar
|
|
85
|
-
if
|
|
86
|
-
registrar_match = re.search(
|
|
91
|
+
if "registrar:" in line_lower and not result["registrar"]:
|
|
92
|
+
registrar_match = re.search(
|
|
93
|
+
r"registrar:\s+(.+)", line_stripped, re.IGNORECASE
|
|
94
|
+
)
|
|
87
95
|
if registrar_match:
|
|
88
|
-
result[
|
|
96
|
+
result["registrar"] = registrar_match.group(1).strip()
|
|
89
97
|
|
|
90
98
|
# Extract dates
|
|
91
|
-
if
|
|
92
|
-
date_match = re.search(
|
|
99
|
+
if "creation date" in line_lower or "created" in line_lower:
|
|
100
|
+
date_match = re.search(
|
|
101
|
+
r":\s+(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4})", line_stripped
|
|
102
|
+
)
|
|
93
103
|
if date_match:
|
|
94
|
-
result[
|
|
95
|
-
|
|
96
|
-
if
|
|
97
|
-
|
|
104
|
+
result["dates"]["created"] = date_match.group(1).strip()
|
|
105
|
+
|
|
106
|
+
if (
|
|
107
|
+
"updated date" in line_lower
|
|
108
|
+
or "last updated" in line_lower
|
|
109
|
+
or "modified" in line_lower
|
|
110
|
+
):
|
|
111
|
+
date_match = re.search(
|
|
112
|
+
r":\s+(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4})", line_stripped
|
|
113
|
+
)
|
|
98
114
|
if date_match:
|
|
99
|
-
result[
|
|
115
|
+
result["dates"]["updated"] = date_match.group(1).strip()
|
|
100
116
|
|
|
101
|
-
if
|
|
102
|
-
date_match = re.search(
|
|
117
|
+
if "expir" in line_lower:
|
|
118
|
+
date_match = re.search(
|
|
119
|
+
r":\s+(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4})", line_stripped
|
|
120
|
+
)
|
|
103
121
|
if date_match:
|
|
104
|
-
result[
|
|
122
|
+
result["dates"]["expires"] = date_match.group(1).strip()
|
|
105
123
|
|
|
106
124
|
# Detect contact sections
|
|
107
|
-
if
|
|
108
|
-
current_section =
|
|
109
|
-
elif
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
current_section =
|
|
125
|
+
if "registrant" in line_lower and "name:" in line_lower:
|
|
126
|
+
current_section = "registrant"
|
|
127
|
+
elif "admin" in line_lower and (
|
|
128
|
+
"name:" in line_lower or "contact" in line_lower
|
|
129
|
+
):
|
|
130
|
+
current_section = "admin"
|
|
131
|
+
elif "tech" in line_lower and (
|
|
132
|
+
"name:" in line_lower or "contact" in line_lower
|
|
133
|
+
):
|
|
134
|
+
current_section = "tech"
|
|
113
135
|
|
|
114
136
|
# Extract contact information based on current section
|
|
115
137
|
if current_section:
|
|
116
138
|
contact_dict = _get_contact_dict(result, current_section)
|
|
117
139
|
|
|
118
|
-
if
|
|
119
|
-
name_match = re.search(r
|
|
140
|
+
if "name:" in line_lower and "domain name" not in line_lower:
|
|
141
|
+
name_match = re.search(r"name:\s+(.+)", line_stripped, re.IGNORECASE)
|
|
120
142
|
if name_match:
|
|
121
|
-
contact_dict[
|
|
143
|
+
contact_dict["name"] = name_match.group(1).strip()
|
|
122
144
|
|
|
123
|
-
if
|
|
124
|
-
org_match = re.search(
|
|
145
|
+
if "organi" in line_lower:
|
|
146
|
+
org_match = re.search(
|
|
147
|
+
r"organi[zs]ation:\s+(.+)", line_stripped, re.IGNORECASE
|
|
148
|
+
)
|
|
125
149
|
if org_match:
|
|
126
|
-
contact_dict[
|
|
150
|
+
contact_dict["organization"] = org_match.group(1).strip()
|
|
127
151
|
|
|
128
|
-
if
|
|
129
|
-
email_match = re.search(r
|
|
152
|
+
if "email" in line_lower:
|
|
153
|
+
email_match = re.search(r"email:\s+(.+)", line_stripped, re.IGNORECASE)
|
|
130
154
|
if email_match:
|
|
131
|
-
contact_dict[
|
|
155
|
+
contact_dict["email"] = email_match.group(1).strip()
|
|
132
156
|
|
|
133
|
-
if
|
|
134
|
-
phone_match = re.search(r
|
|
157
|
+
if "phone" in line_lower:
|
|
158
|
+
phone_match = re.search(r"phone:\s+(.+)", line_stripped, re.IGNORECASE)
|
|
135
159
|
if phone_match:
|
|
136
|
-
contact_dict[
|
|
160
|
+
contact_dict["phone"] = phone_match.group(1).strip()
|
|
137
161
|
|
|
138
162
|
# Extract nameservers
|
|
139
|
-
if
|
|
140
|
-
ns_match = re.search(
|
|
163
|
+
if "name server" in line_lower or "nserver" in line_lower:
|
|
164
|
+
ns_match = re.search(
|
|
165
|
+
r"(?:name server|nserver):\s+(.+)", line_stripped, re.IGNORECASE
|
|
166
|
+
)
|
|
141
167
|
if ns_match:
|
|
142
168
|
nameserver = ns_match.group(1).strip().lower()
|
|
143
|
-
if nameserver not in result[
|
|
144
|
-
result[
|
|
169
|
+
if nameserver not in result["nameservers"]:
|
|
170
|
+
result["nameservers"].append(nameserver)
|
|
145
171
|
|
|
146
172
|
# Extract status
|
|
147
|
-
if
|
|
148
|
-
status_match = re.search(r
|
|
173
|
+
if "status:" in line_lower:
|
|
174
|
+
status_match = re.search(r"status:\s+(.+)", line_stripped, re.IGNORECASE)
|
|
149
175
|
if status_match:
|
|
150
176
|
status = status_match.group(1).strip()
|
|
151
|
-
if status not in result[
|
|
152
|
-
result[
|
|
177
|
+
if status not in result["status"]:
|
|
178
|
+
result["status"].append(status)
|
|
153
179
|
|
|
154
180
|
# Extract DNSSEC
|
|
155
|
-
if
|
|
156
|
-
dnssec_match = re.search(r
|
|
181
|
+
if "dnssec:" in line_lower:
|
|
182
|
+
dnssec_match = re.search(r"dnssec:\s+(.+)", line_stripped, re.IGNORECASE)
|
|
157
183
|
if dnssec_match:
|
|
158
|
-
result[
|
|
184
|
+
result["dnssec"] = dnssec_match.group(1).strip()
|
|
159
185
|
|
|
160
186
|
return result
|
|
161
187
|
|
|
162
188
|
|
|
163
189
|
def _get_contact_dict(result: Dict[str, Any], section: str) -> Dict[str, Any]:
|
|
164
190
|
"""Get the appropriate contact dictionary based on section."""
|
|
165
|
-
if section ==
|
|
166
|
-
return result[
|
|
167
|
-
elif section ==
|
|
168
|
-
return result[
|
|
169
|
-
elif section ==
|
|
170
|
-
return result[
|
|
191
|
+
if section == "registrant":
|
|
192
|
+
return result["registrant"]
|
|
193
|
+
elif section == "admin":
|
|
194
|
+
return result["admin_contact"]
|
|
195
|
+
elif section == "tech":
|
|
196
|
+
return result["tech_contact"]
|
|
171
197
|
return {}
|
|
172
198
|
|
|
173
199
|
|
|
@@ -183,17 +209,21 @@ def extract_emails(parsed_data: Dict[str, Any]) -> List[str]:
|
|
|
183
209
|
"""
|
|
184
210
|
emails = []
|
|
185
211
|
|
|
186
|
-
for contact in [
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
212
|
+
for contact in [
|
|
213
|
+
parsed_data.get("registrant", {}),
|
|
214
|
+
parsed_data.get("admin_contact", {}),
|
|
215
|
+
parsed_data.get("tech_contact", {}),
|
|
216
|
+
]:
|
|
217
|
+
email = contact.get("email")
|
|
218
|
+
if email and email not in emails and "@" in email:
|
|
191
219
|
emails.append(email)
|
|
192
220
|
|
|
193
221
|
return emails
|
|
194
222
|
|
|
195
223
|
|
|
196
|
-
def map_to_osint_data(
|
|
224
|
+
def map_to_osint_data(
|
|
225
|
+
parsed_data: Dict[str, Any], engagement_id: int
|
|
226
|
+
) -> Dict[str, Any]:
|
|
197
227
|
"""
|
|
198
228
|
Convert parsed WHOIS data into OSINT record for database storage.
|
|
199
229
|
|
|
@@ -209,36 +239,42 @@ def map_to_osint_data(parsed_data: Dict[str, Any], engagement_id: int) -> Dict[s
|
|
|
209
239
|
# Extract key information for quick reference
|
|
210
240
|
summary_parts = []
|
|
211
241
|
|
|
212
|
-
if parsed_data.get(
|
|
242
|
+
if parsed_data.get("registrar"):
|
|
213
243
|
summary_parts.append(f"Registrar: {parsed_data['registrar']}")
|
|
214
244
|
|
|
215
|
-
if parsed_data.get(
|
|
245
|
+
if parsed_data.get("dates", {}).get("created"):
|
|
216
246
|
summary_parts.append(f"Created: {parsed_data['dates']['created']}")
|
|
217
247
|
|
|
218
|
-
if parsed_data.get(
|
|
248
|
+
if parsed_data.get("dates", {}).get("expires"):
|
|
219
249
|
summary_parts.append(f"Expires: {parsed_data['dates']['expires']}")
|
|
220
250
|
|
|
221
|
-
if parsed_data.get(
|
|
251
|
+
if parsed_data.get("registrant", {}).get("organization"):
|
|
222
252
|
summary_parts.append(f"Org: {parsed_data['registrant']['organization']}")
|
|
223
253
|
|
|
224
|
-
summary =
|
|
254
|
+
summary = (
|
|
255
|
+
" | ".join(summary_parts)
|
|
256
|
+
if summary_parts
|
|
257
|
+
else "Domain registration information"
|
|
258
|
+
)
|
|
225
259
|
|
|
226
260
|
# Extract emails for OSINT correlation
|
|
227
261
|
emails = extract_emails(parsed_data)
|
|
228
262
|
|
|
229
263
|
osint_record = {
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
264
|
+
"target": parsed_data.get("domain", ""),
|
|
265
|
+
"data_type": "domain_info",
|
|
266
|
+
"source": "whois",
|
|
267
|
+
"content": json.dumps(parsed_data, indent=2),
|
|
268
|
+
"summary": summary,
|
|
269
|
+
"metadata": json.dumps(
|
|
270
|
+
{
|
|
271
|
+
"registrar": parsed_data.get("registrar"),
|
|
272
|
+
"nameservers": parsed_data.get("nameservers", []),
|
|
273
|
+
"emails": emails,
|
|
274
|
+
"expiration_date": parsed_data.get("dates", {}).get("expires"),
|
|
275
|
+
"dnssec": parsed_data.get("dnssec"),
|
|
276
|
+
}
|
|
277
|
+
),
|
|
242
278
|
}
|
|
243
279
|
|
|
244
280
|
return osint_record
|
|
@@ -255,23 +291,23 @@ def check_privacy_protection(parsed_data: Dict[str, Any]) -> bool:
|
|
|
255
291
|
True if privacy protection detected
|
|
256
292
|
"""
|
|
257
293
|
privacy_indicators = [
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
294
|
+
"privacy",
|
|
295
|
+
"redacted",
|
|
296
|
+
"protected",
|
|
297
|
+
"whoisguard",
|
|
298
|
+
"domains by proxy",
|
|
299
|
+
"private registration",
|
|
264
300
|
]
|
|
265
301
|
|
|
266
302
|
# Check registrant info
|
|
267
|
-
registrant_text = str(parsed_data.get(
|
|
303
|
+
registrant_text = str(parsed_data.get("registrant", {})).lower()
|
|
268
304
|
|
|
269
305
|
for indicator in privacy_indicators:
|
|
270
306
|
if indicator in registrant_text:
|
|
271
307
|
return True
|
|
272
308
|
|
|
273
309
|
# Check registrar
|
|
274
|
-
registrar = str(parsed_data.get(
|
|
310
|
+
registrar = str(parsed_data.get("registrar", "")).lower()
|
|
275
311
|
for indicator in privacy_indicators:
|
|
276
312
|
if indicator in registrar:
|
|
277
313
|
return True
|
|
@@ -280,4 +316,9 @@ def check_privacy_protection(parsed_data: Dict[str, Any]) -> bool:
|
|
|
280
316
|
|
|
281
317
|
|
|
282
318
|
# Export the main functions
|
|
283
|
-
__all__ = [
|
|
319
|
+
__all__ = [
|
|
320
|
+
"parse_whois_output",
|
|
321
|
+
"map_to_osint_data",
|
|
322
|
+
"extract_emails",
|
|
323
|
+
"check_privacy_protection",
|
|
324
|
+
]
|