souleyez 2.43.29__py3-none-any.whl → 2.43.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. souleyez/__init__.py +1 -2
  2. souleyez/ai/__init__.py +21 -15
  3. souleyez/ai/action_mapper.py +249 -150
  4. souleyez/ai/chain_advisor.py +116 -100
  5. souleyez/ai/claude_provider.py +29 -28
  6. souleyez/ai/context_builder.py +80 -62
  7. souleyez/ai/executor.py +158 -117
  8. souleyez/ai/feedback_handler.py +136 -121
  9. souleyez/ai/llm_factory.py +27 -20
  10. souleyez/ai/llm_provider.py +4 -2
  11. souleyez/ai/ollama_provider.py +6 -9
  12. souleyez/ai/ollama_service.py +44 -37
  13. souleyez/ai/path_scorer.py +91 -76
  14. souleyez/ai/recommender.py +176 -144
  15. souleyez/ai/report_context.py +74 -73
  16. souleyez/ai/report_service.py +84 -66
  17. souleyez/ai/result_parser.py +222 -229
  18. souleyez/ai/safety.py +67 -44
  19. souleyez/auth/__init__.py +23 -22
  20. souleyez/auth/audit.py +36 -26
  21. souleyez/auth/engagement_access.py +65 -48
  22. souleyez/auth/permissions.py +14 -3
  23. souleyez/auth/session_manager.py +54 -37
  24. souleyez/auth/user_manager.py +109 -64
  25. souleyez/commands/audit.py +40 -43
  26. souleyez/commands/auth.py +35 -15
  27. souleyez/commands/deliverables.py +55 -50
  28. souleyez/commands/engagement.py +47 -28
  29. souleyez/commands/license.py +32 -23
  30. souleyez/commands/screenshots.py +36 -32
  31. souleyez/commands/user.py +82 -36
  32. souleyez/config.py +52 -44
  33. souleyez/core/credential_tester.py +87 -81
  34. souleyez/core/cve_mappings.py +179 -192
  35. souleyez/core/cve_matcher.py +162 -148
  36. souleyez/core/msf_auto_mapper.py +100 -83
  37. souleyez/core/msf_chain_engine.py +294 -256
  38. souleyez/core/msf_database.py +153 -70
  39. souleyez/core/msf_integration.py +679 -673
  40. souleyez/core/msf_rpc_client.py +40 -42
  41. souleyez/core/msf_rpc_manager.py +77 -79
  42. souleyez/core/msf_sync_manager.py +241 -181
  43. souleyez/core/network_utils.py +22 -15
  44. souleyez/core/parser_handler.py +34 -25
  45. souleyez/core/pending_chains.py +114 -63
  46. souleyez/core/templates.py +158 -107
  47. souleyez/core/tool_chaining.py +9526 -2879
  48. souleyez/core/version_utils.py +79 -94
  49. souleyez/core/vuln_correlation.py +136 -89
  50. souleyez/core/web_utils.py +33 -32
  51. souleyez/data/wordlists/ad_users.txt +378 -0
  52. souleyez/data/wordlists/api_endpoints_large.txt +769 -0
  53. souleyez/data/wordlists/home_dir_sensitive.txt +39 -0
  54. souleyez/data/wordlists/lfi_payloads.txt +82 -0
  55. souleyez/data/wordlists/passwords_brute.txt +1548 -0
  56. souleyez/data/wordlists/passwords_crack.txt +2479 -0
  57. souleyez/data/wordlists/passwords_spray.txt +386 -0
  58. souleyez/data/wordlists/subdomains_large.txt +5057 -0
  59. souleyez/data/wordlists/usernames_common.txt +694 -0
  60. souleyez/data/wordlists/web_dirs_large.txt +4769 -0
  61. souleyez/detection/__init__.py +1 -1
  62. souleyez/detection/attack_signatures.py +12 -17
  63. souleyez/detection/mitre_mappings.py +61 -55
  64. souleyez/detection/validator.py +97 -86
  65. souleyez/devtools.py +23 -10
  66. souleyez/docs/README.md +4 -4
  67. souleyez/docs/api-reference/cli-commands.md +2 -2
  68. souleyez/docs/developer-guide/adding-new-tools.md +562 -0
  69. souleyez/docs/user-guide/auto-chaining.md +30 -8
  70. souleyez/docs/user-guide/getting-started.md +1 -1
  71. souleyez/docs/user-guide/installation.md +26 -3
  72. souleyez/docs/user-guide/metasploit-integration.md +2 -2
  73. souleyez/docs/user-guide/rbac.md +1 -1
  74. souleyez/docs/user-guide/scope-management.md +1 -1
  75. souleyez/docs/user-guide/siem-integration.md +1 -1
  76. souleyez/docs/user-guide/tools-reference.md +1 -8
  77. souleyez/docs/user-guide/worker-management.md +1 -1
  78. souleyez/engine/background.py +1239 -535
  79. souleyez/engine/base.py +4 -1
  80. souleyez/engine/job_status.py +17 -49
  81. souleyez/engine/log_sanitizer.py +103 -77
  82. souleyez/engine/manager.py +38 -7
  83. souleyez/engine/result_handler.py +2200 -1550
  84. souleyez/engine/worker_manager.py +50 -41
  85. souleyez/export/evidence_bundle.py +72 -62
  86. souleyez/feature_flags/features.py +16 -20
  87. souleyez/feature_flags.py +5 -9
  88. souleyez/handlers/__init__.py +11 -0
  89. souleyez/handlers/base.py +188 -0
  90. souleyez/handlers/bash_handler.py +277 -0
  91. souleyez/handlers/bloodhound_handler.py +243 -0
  92. souleyez/handlers/certipy_handler.py +311 -0
  93. souleyez/handlers/crackmapexec_handler.py +486 -0
  94. souleyez/handlers/dnsrecon_handler.py +344 -0
  95. souleyez/handlers/enum4linux_handler.py +400 -0
  96. souleyez/handlers/evil_winrm_handler.py +493 -0
  97. souleyez/handlers/ffuf_handler.py +815 -0
  98. souleyez/handlers/gobuster_handler.py +1114 -0
  99. souleyez/handlers/gpp_extract_handler.py +334 -0
  100. souleyez/handlers/hashcat_handler.py +444 -0
  101. souleyez/handlers/hydra_handler.py +563 -0
  102. souleyez/handlers/impacket_getuserspns_handler.py +343 -0
  103. souleyez/handlers/impacket_psexec_handler.py +222 -0
  104. souleyez/handlers/impacket_secretsdump_handler.py +426 -0
  105. souleyez/handlers/john_handler.py +286 -0
  106. souleyez/handlers/katana_handler.py +425 -0
  107. souleyez/handlers/kerbrute_handler.py +298 -0
  108. souleyez/handlers/ldapsearch_handler.py +636 -0
  109. souleyez/handlers/lfi_extract_handler.py +464 -0
  110. souleyez/handlers/msf_auxiliary_handler.py +408 -0
  111. souleyez/handlers/msf_exploit_handler.py +380 -0
  112. souleyez/handlers/nikto_handler.py +413 -0
  113. souleyez/handlers/nmap_handler.py +821 -0
  114. souleyez/handlers/nuclei_handler.py +359 -0
  115. souleyez/handlers/nxc_handler.py +371 -0
  116. souleyez/handlers/rdp_sec_check_handler.py +353 -0
  117. souleyez/handlers/registry.py +292 -0
  118. souleyez/handlers/responder_handler.py +232 -0
  119. souleyez/handlers/service_explorer_handler.py +434 -0
  120. souleyez/handlers/smbclient_handler.py +344 -0
  121. souleyez/handlers/smbmap_handler.py +510 -0
  122. souleyez/handlers/smbpasswd_handler.py +296 -0
  123. souleyez/handlers/sqlmap_handler.py +1116 -0
  124. souleyez/handlers/theharvester_handler.py +601 -0
  125. souleyez/handlers/web_login_test_handler.py +327 -0
  126. souleyez/handlers/whois_handler.py +277 -0
  127. souleyez/handlers/wpscan_handler.py +554 -0
  128. souleyez/history.py +32 -16
  129. souleyez/importers/msf_importer.py +106 -75
  130. souleyez/importers/smart_importer.py +208 -147
  131. souleyez/integrations/siem/__init__.py +10 -10
  132. souleyez/integrations/siem/base.py +17 -18
  133. souleyez/integrations/siem/elastic.py +108 -122
  134. souleyez/integrations/siem/factory.py +207 -80
  135. souleyez/integrations/siem/googlesecops.py +146 -154
  136. souleyez/integrations/siem/rule_mappings/__init__.py +1 -1
  137. souleyez/integrations/siem/rule_mappings/wazuh_rules.py +8 -5
  138. souleyez/integrations/siem/sentinel.py +107 -109
  139. souleyez/integrations/siem/splunk.py +246 -212
  140. souleyez/integrations/siem/wazuh.py +65 -71
  141. souleyez/integrations/wazuh/__init__.py +5 -5
  142. souleyez/integrations/wazuh/client.py +70 -93
  143. souleyez/integrations/wazuh/config.py +85 -57
  144. souleyez/integrations/wazuh/host_mapper.py +28 -36
  145. souleyez/integrations/wazuh/sync.py +78 -68
  146. souleyez/intelligence/__init__.py +4 -5
  147. souleyez/intelligence/correlation_analyzer.py +309 -295
  148. souleyez/intelligence/exploit_knowledge.py +661 -623
  149. souleyez/intelligence/exploit_suggestions.py +159 -139
  150. souleyez/intelligence/gap_analyzer.py +132 -97
  151. souleyez/intelligence/gap_detector.py +251 -214
  152. souleyez/intelligence/sensitive_tables.py +266 -129
  153. souleyez/intelligence/service_parser.py +137 -123
  154. souleyez/intelligence/surface_analyzer.py +407 -268
  155. souleyez/intelligence/target_parser.py +159 -162
  156. souleyez/licensing/__init__.py +6 -6
  157. souleyez/licensing/validator.py +17 -19
  158. souleyez/log_config.py +79 -54
  159. souleyez/main.py +1505 -687
  160. souleyez/migrations/fix_job_counter.py +16 -14
  161. souleyez/parsers/bloodhound_parser.py +41 -39
  162. souleyez/parsers/crackmapexec_parser.py +178 -111
  163. souleyez/parsers/dalfox_parser.py +72 -77
  164. souleyez/parsers/dnsrecon_parser.py +103 -91
  165. souleyez/parsers/enum4linux_parser.py +183 -153
  166. souleyez/parsers/ffuf_parser.py +29 -25
  167. souleyez/parsers/gobuster_parser.py +301 -41
  168. souleyez/parsers/hashcat_parser.py +324 -79
  169. souleyez/parsers/http_fingerprint_parser.py +350 -103
  170. souleyez/parsers/hydra_parser.py +131 -111
  171. souleyez/parsers/impacket_parser.py +231 -178
  172. souleyez/parsers/john_parser.py +98 -86
  173. souleyez/parsers/katana_parser.py +316 -0
  174. souleyez/parsers/msf_parser.py +943 -498
  175. souleyez/parsers/nikto_parser.py +346 -65
  176. souleyez/parsers/nmap_parser.py +262 -174
  177. souleyez/parsers/nuclei_parser.py +40 -44
  178. souleyez/parsers/responder_parser.py +26 -26
  179. souleyez/parsers/searchsploit_parser.py +74 -74
  180. souleyez/parsers/service_explorer_parser.py +279 -0
  181. souleyez/parsers/smbmap_parser.py +180 -124
  182. souleyez/parsers/sqlmap_parser.py +434 -308
  183. souleyez/parsers/theharvester_parser.py +75 -57
  184. souleyez/parsers/whois_parser.py +135 -94
  185. souleyez/parsers/wpscan_parser.py +278 -190
  186. souleyez/plugins/afp.py +44 -36
  187. souleyez/plugins/afp_brute.py +114 -46
  188. souleyez/plugins/ard.py +48 -37
  189. souleyez/plugins/bloodhound.py +95 -61
  190. souleyez/plugins/certipy.py +303 -0
  191. souleyez/plugins/crackmapexec.py +186 -85
  192. souleyez/plugins/dalfox.py +120 -59
  193. souleyez/plugins/dns_hijack.py +146 -41
  194. souleyez/plugins/dnsrecon.py +97 -61
  195. souleyez/plugins/enum4linux.py +91 -66
  196. souleyez/plugins/evil_winrm.py +291 -0
  197. souleyez/plugins/ffuf.py +166 -90
  198. souleyez/plugins/firmware_extract.py +133 -29
  199. souleyez/plugins/gobuster.py +387 -190
  200. souleyez/plugins/gpp_extract.py +393 -0
  201. souleyez/plugins/hashcat.py +100 -73
  202. souleyez/plugins/http_fingerprint.py +854 -267
  203. souleyez/plugins/hydra.py +566 -200
  204. souleyez/plugins/impacket_getnpusers.py +117 -69
  205. souleyez/plugins/impacket_psexec.py +84 -64
  206. souleyez/plugins/impacket_secretsdump.py +103 -69
  207. souleyez/plugins/impacket_smbclient.py +89 -75
  208. souleyez/plugins/john.py +86 -69
  209. souleyez/plugins/katana.py +313 -0
  210. souleyez/plugins/kerbrute.py +237 -0
  211. souleyez/plugins/lfi_extract.py +541 -0
  212. souleyez/plugins/macos_ssh.py +117 -48
  213. souleyez/plugins/mdns.py +35 -30
  214. souleyez/plugins/msf_auxiliary.py +253 -130
  215. souleyez/plugins/msf_exploit.py +239 -161
  216. souleyez/plugins/nikto.py +134 -78
  217. souleyez/plugins/nmap.py +275 -91
  218. souleyez/plugins/nuclei.py +180 -89
  219. souleyez/plugins/nxc.py +285 -0
  220. souleyez/plugins/plugin_base.py +35 -36
  221. souleyez/plugins/plugin_template.py +13 -5
  222. souleyez/plugins/rdp_sec_check.py +130 -0
  223. souleyez/plugins/responder.py +112 -71
  224. souleyez/plugins/router_http_brute.py +76 -65
  225. souleyez/plugins/router_ssh_brute.py +118 -41
  226. souleyez/plugins/router_telnet_brute.py +124 -42
  227. souleyez/plugins/routersploit.py +91 -59
  228. souleyez/plugins/routersploit_exploit.py +77 -55
  229. souleyez/plugins/searchsploit.py +91 -77
  230. souleyez/plugins/service_explorer.py +1160 -0
  231. souleyez/plugins/smbmap.py +122 -72
  232. souleyez/plugins/smbpasswd.py +215 -0
  233. souleyez/plugins/sqlmap.py +301 -113
  234. souleyez/plugins/theharvester.py +127 -75
  235. souleyez/plugins/tr069.py +79 -57
  236. souleyez/plugins/upnp.py +65 -47
  237. souleyez/plugins/upnp_abuse.py +73 -55
  238. souleyez/plugins/vnc_access.py +129 -42
  239. souleyez/plugins/vnc_brute.py +109 -38
  240. souleyez/plugins/web_login_test.py +417 -0
  241. souleyez/plugins/whois.py +77 -58
  242. souleyez/plugins/wpscan.py +173 -69
  243. souleyez/reporting/__init__.py +2 -1
  244. souleyez/reporting/attack_chain.py +411 -346
  245. souleyez/reporting/charts.py +436 -501
  246. souleyez/reporting/compliance_mappings.py +334 -201
  247. souleyez/reporting/detection_report.py +126 -125
  248. souleyez/reporting/formatters.py +828 -591
  249. souleyez/reporting/generator.py +386 -302
  250. souleyez/reporting/metrics.py +72 -75
  251. souleyez/scanner.py +35 -29
  252. souleyez/security/__init__.py +37 -11
  253. souleyez/security/scope_validator.py +175 -106
  254. souleyez/security/validation.py +223 -149
  255. souleyez/security.py +22 -6
  256. souleyez/storage/credentials.py +247 -186
  257. souleyez/storage/crypto.py +296 -129
  258. souleyez/storage/database.py +73 -50
  259. souleyez/storage/db.py +58 -36
  260. souleyez/storage/deliverable_evidence.py +177 -128
  261. souleyez/storage/deliverable_exporter.py +282 -246
  262. souleyez/storage/deliverable_templates.py +134 -116
  263. souleyez/storage/deliverables.py +135 -130
  264. souleyez/storage/engagements.py +109 -56
  265. souleyez/storage/evidence.py +181 -152
  266. souleyez/storage/execution_log.py +31 -17
  267. souleyez/storage/exploit_attempts.py +93 -57
  268. souleyez/storage/exploits.py +67 -36
  269. souleyez/storage/findings.py +48 -61
  270. souleyez/storage/hosts.py +176 -144
  271. souleyez/storage/migrate_to_engagements.py +43 -19
  272. souleyez/storage/migrations/_001_add_credential_enhancements.py +22 -12
  273. souleyez/storage/migrations/_002_add_status_tracking.py +10 -7
  274. souleyez/storage/migrations/_003_add_execution_log.py +14 -8
  275. souleyez/storage/migrations/_005_screenshots.py +13 -5
  276. souleyez/storage/migrations/_006_deliverables.py +13 -5
  277. souleyez/storage/migrations/_007_deliverable_templates.py +12 -7
  278. souleyez/storage/migrations/_008_add_nuclei_table.py +10 -4
  279. souleyez/storage/migrations/_010_evidence_linking.py +17 -10
  280. souleyez/storage/migrations/_011_timeline_tracking.py +20 -13
  281. souleyez/storage/migrations/_012_team_collaboration.py +34 -21
  282. souleyez/storage/migrations/_013_add_host_tags.py +12 -6
  283. souleyez/storage/migrations/_014_exploit_attempts.py +22 -10
  284. souleyez/storage/migrations/_015_add_mac_os_fields.py +15 -7
  285. souleyez/storage/migrations/_016_add_domain_field.py +10 -4
  286. souleyez/storage/migrations/_017_msf_sessions.py +16 -8
  287. souleyez/storage/migrations/_018_add_osint_target.py +10 -6
  288. souleyez/storage/migrations/_019_add_engagement_type.py +10 -6
  289. souleyez/storage/migrations/_020_add_rbac.py +36 -15
  290. souleyez/storage/migrations/_021_wazuh_integration.py +20 -8
  291. souleyez/storage/migrations/_022_wazuh_indexer_columns.py +6 -4
  292. souleyez/storage/migrations/_023_fix_detection_results_fk.py +16 -6
  293. souleyez/storage/migrations/_024_wazuh_vulnerabilities.py +26 -10
  294. souleyez/storage/migrations/_025_multi_siem_support.py +3 -5
  295. souleyez/storage/migrations/_026_add_engagement_scope.py +31 -12
  296. souleyez/storage/migrations/_027_multi_siem_persistence.py +32 -15
  297. souleyez/storage/migrations/__init__.py +26 -26
  298. souleyez/storage/migrations/migration_manager.py +19 -19
  299. souleyez/storage/msf_sessions.py +100 -65
  300. souleyez/storage/osint.py +17 -24
  301. souleyez/storage/recommendation_engine.py +269 -235
  302. souleyez/storage/screenshots.py +33 -32
  303. souleyez/storage/smb_shares.py +136 -92
  304. souleyez/storage/sqlmap_data.py +183 -128
  305. souleyez/storage/team_collaboration.py +135 -141
  306. souleyez/storage/timeline_tracker.py +122 -94
  307. souleyez/storage/wazuh_vulns.py +64 -66
  308. souleyez/storage/web_paths.py +33 -37
  309. souleyez/testing/credential_tester.py +221 -205
  310. souleyez/ui/__init__.py +1 -1
  311. souleyez/ui/ai_quotes.py +12 -12
  312. souleyez/ui/attack_surface.py +2439 -1516
  313. souleyez/ui/chain_rules_view.py +914 -382
  314. souleyez/ui/correlation_view.py +312 -230
  315. souleyez/ui/dashboard.py +2382 -1130
  316. souleyez/ui/deliverables_view.py +148 -62
  317. souleyez/ui/design_system.py +13 -13
  318. souleyez/ui/errors.py +49 -49
  319. souleyez/ui/evidence_linking_view.py +284 -179
  320. souleyez/ui/evidence_vault.py +393 -285
  321. souleyez/ui/exploit_suggestions_view.py +555 -349
  322. souleyez/ui/export_view.py +100 -66
  323. souleyez/ui/gap_analysis_view.py +315 -171
  324. souleyez/ui/help_system.py +105 -97
  325. souleyez/ui/intelligence_view.py +436 -293
  326. souleyez/ui/interactive.py +22827 -10678
  327. souleyez/ui/interactive_selector.py +75 -68
  328. souleyez/ui/log_formatter.py +47 -39
  329. souleyez/ui/menu_components.py +22 -13
  330. souleyez/ui/msf_auxiliary_menu.py +184 -133
  331. souleyez/ui/pending_chains_view.py +336 -172
  332. souleyez/ui/progress_indicators.py +5 -3
  333. souleyez/ui/recommendations_view.py +195 -137
  334. souleyez/ui/rule_builder.py +343 -225
  335. souleyez/ui/setup_wizard.py +678 -284
  336. souleyez/ui/shortcuts.py +217 -165
  337. souleyez/ui/splunk_gap_analysis_view.py +452 -270
  338. souleyez/ui/splunk_vulns_view.py +139 -86
  339. souleyez/ui/team_dashboard.py +498 -335
  340. souleyez/ui/template_selector.py +196 -105
  341. souleyez/ui/terminal.py +6 -6
  342. souleyez/ui/timeline_view.py +198 -127
  343. souleyez/ui/tool_setup.py +264 -164
  344. souleyez/ui/tutorial.py +202 -72
  345. souleyez/ui/tutorial_state.py +40 -40
  346. souleyez/ui/wazuh_vulns_view.py +235 -141
  347. souleyez/ui/wordlist_browser.py +260 -107
  348. souleyez/ui.py +464 -312
  349. souleyez/utils/tool_checker.py +427 -367
  350. souleyez/utils.py +33 -29
  351. souleyez/wordlists.py +134 -167
  352. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/METADATA +1 -1
  353. souleyez-2.43.34.dist-info/RECORD +443 -0
  354. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/WHEEL +1 -1
  355. souleyez-2.43.29.dist-info/RECORD +0 -379
  356. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/entry_points.txt +0 -0
  357. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/licenses/LICENSE +0 -0
  358. {souleyez-2.43.29.dist-info → souleyez-2.43.34.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1114 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Gobuster handler.
4
+
5
+ Consolidates parsing and display logic for Gobuster directory enumeration jobs.
6
+ """
7
+ import logging
8
+ import os
9
+ import re
10
+ import ssl
11
+ import socket
12
+ import urllib.request
13
+ import urllib.error
14
+ from typing import Any, Dict, List, Optional
15
+ from urllib.parse import urlparse, urljoin
16
+ import defusedxml.ElementTree as ElementTree # Safe XML parsing
17
+
18
+ import click
19
+
20
+ from souleyez.engine.job_status import (
21
+ STATUS_DONE,
22
+ STATUS_ERROR,
23
+ STATUS_NO_RESULTS,
24
+ STATUS_WARNING,
25
+ )
26
+ from souleyez.handlers.base import BaseToolHandler
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class GobusterHandler(BaseToolHandler):
32
+ """Handler for Gobuster directory enumeration jobs."""
33
+
34
+ tool_name = "gobuster"
35
+ display_name = "Gobuster"
36
+
37
+ # All handlers enabled
38
+ has_error_handler = True
39
+ has_warning_handler = True
40
+ has_no_results_handler = True
41
+ has_done_handler = True
42
+
43
+ # Security concern patterns for sensitive path detection
44
+ SECURITY_CONCERN_PATTERNS = {
45
+ "home_directory": {
46
+ "patterns": [
47
+ r"\.bashrc$",
48
+ r"\.profile$",
49
+ r"\.bash_profile$",
50
+ r"\.bash_history$",
51
+ r"\.zshrc$",
52
+ ],
53
+ "label": "Home directory exposed (misconfigured web root)",
54
+ "severity": "critical",
55
+ },
56
+ "database_files": {
57
+ "patterns": [
58
+ r"\.sql$",
59
+ r"\.db$",
60
+ r"\.mdb$",
61
+ r"\.sqlite",
62
+ r"/db\.",
63
+ r"/database\.",
64
+ r"\.bak\.sql",
65
+ ],
66
+ "label": "Database file exposed",
67
+ "severity": "high",
68
+ },
69
+ "backup_files": {
70
+ "patterns": [
71
+ r"\.bak$",
72
+ r"\.old$",
73
+ r"\.backup$",
74
+ r"\.orig$",
75
+ r"\.save$",
76
+ r"\.swp$",
77
+ r"~$",
78
+ r"\.zip$",
79
+ r"\.tar",
80
+ r"\.gz$",
81
+ r"\.rar$",
82
+ ],
83
+ "label": "Backup/archive file",
84
+ "severity": "high",
85
+ },
86
+ "config_files": {
87
+ "patterns": [
88
+ r"web\.config",
89
+ r"\.htaccess",
90
+ r"\.htpasswd",
91
+ r"\.env$",
92
+ r"config\.php",
93
+ r"config\.inc",
94
+ r"settings\.py",
95
+ r"\.ini$",
96
+ r"\.conf$",
97
+ r"\.cfg$",
98
+ ],
99
+ "label": "Configuration file exposed",
100
+ "severity": "high",
101
+ },
102
+ "source_files": {
103
+ "patterns": [
104
+ r"\.git(/|$)",
105
+ r"\.svn(/|$)",
106
+ r"\.DS_Store",
107
+ r"\.vscode(/|$)",
108
+ r"\.idea(/|$)",
109
+ r"Thumbs\.db",
110
+ r"\.log$",
111
+ r"debug\.",
112
+ r"test\.php",
113
+ r"phpinfo",
114
+ ],
115
+ "label": "Development/debug file",
116
+ "severity": "medium",
117
+ },
118
+ "legacy_dirs": {
119
+ "patterns": [
120
+ r"_vti_",
121
+ r"/cgi-bin(/|$)",
122
+ r"/cgi(/|$)",
123
+ r"/fcgi(/|$)",
124
+ r"/admin(/|$)",
125
+ r"/administrator(/|$)",
126
+ r"/phpmyadmin(/|$)",
127
+ r"/pma(/|$)",
128
+ r"/myadmin(/|$)",
129
+ ],
130
+ "label": "Legacy/admin directory",
131
+ "severity": "medium",
132
+ },
133
+ "sensitive_endpoints": {
134
+ "patterns": [
135
+ r"/upload(/|$)",
136
+ r"/uploads(/|$)",
137
+ r"/file(/|$)",
138
+ r"/files(/|$)",
139
+ r"/tmp(/|$)",
140
+ r"/temp(/|$)",
141
+ r"/private(/|$)",
142
+ r"/internal(/|$)",
143
+ r"/api(/|$)",
144
+ r"/bank(/|$)",
145
+ ],
146
+ "label": "Potentially sensitive directory",
147
+ "severity": "low",
148
+ },
149
+ }
150
+
151
+ # High-value directory keywords for auto-chaining
152
+ HIGH_VALUE_DIR_KEYWORDS = [
153
+ "mutillidae",
154
+ "dvwa",
155
+ "bwapp",
156
+ "webgoat",
157
+ "phpmyadmin",
158
+ "juice",
159
+ "juice-shop",
160
+ "hackazon",
161
+ "pentesterlab",
162
+ "vulnhub",
163
+ "api",
164
+ "rest",
165
+ "graphql",
166
+ "drupal",
167
+ "wordpress",
168
+ "joomla",
169
+ "moodle",
170
+ "magento",
171
+ "phpbb",
172
+ "opencart",
173
+ "prestashop",
174
+ "zen-cart",
175
+ "oscommerce",
176
+ # WordPress directory indicators (trigger wpscan chains)
177
+ "wp-content",
178
+ "wp-admin",
179
+ "wp-includes",
180
+ ]
181
+
182
+ # Files to extract content from when discovered (for chaining)
183
+ # These files often contain paths that aren't in standard wordlists
184
+ CONTENT_EXTRACTION_FILES = {
185
+ "robots.txt": "_extract_robots_paths",
186
+ "sitemap.xml": "_extract_sitemap_urls",
187
+ }
188
+
189
+ def parse_job(
190
+ self,
191
+ engagement_id: int,
192
+ log_path: str,
193
+ job: Dict[str, Any],
194
+ host_manager: Optional[Any] = None,
195
+ findings_manager: Optional[Any] = None,
196
+ credentials_manager: Optional[Any] = None,
197
+ ) -> Dict[str, Any]:
198
+ """
199
+ Parse Gobuster job results.
200
+
201
+ Extracts discovered paths and stores them in the database.
202
+ """
203
+ try:
204
+ from souleyez.parsers.gobuster_parser import (
205
+ parse_gobuster_output,
206
+ get_paths_stats,
207
+ )
208
+ from souleyez.engine.result_handler import detect_tool_error
209
+
210
+ # Import managers if not provided
211
+ if host_manager is None:
212
+ from souleyez.storage.hosts import HostManager
213
+
214
+ host_manager = HostManager()
215
+ if findings_manager is None:
216
+ from souleyez.storage.findings import FindingsManager
217
+
218
+ findings_manager = FindingsManager()
219
+
220
+ # Read the log file
221
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
222
+ log_content = f.read()
223
+
224
+ # Parse gobuster output
225
+ target = job.get("target", "")
226
+ parsed = parse_gobuster_output(log_content, target)
227
+
228
+ # Get or create host from target URL
229
+ host_id = None
230
+ if parsed["target_url"]:
231
+ parsed_url = urlparse(parsed["target_url"])
232
+ hostname = parsed_url.hostname
233
+
234
+ if hostname:
235
+ hosts = host_manager.list_hosts(engagement_id)
236
+ for host in hosts:
237
+ if (
238
+ host.get("hostname") == hostname
239
+ or host.get("ip_address") == hostname
240
+ ):
241
+ host_id = host["id"]
242
+ break
243
+
244
+ if not host_id:
245
+ is_ip = re.match(
246
+ r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", hostname
247
+ )
248
+ if is_ip:
249
+ result = host_manager.add_or_update_host(
250
+ engagement_id, {"ip": hostname, "status": "up"}
251
+ )
252
+ if isinstance(result, dict):
253
+ host_id = result.get("id")
254
+ else:
255
+ host_id = result
256
+
257
+ # Store web paths
258
+ paths_added = 0
259
+ created_findings = []
260
+
261
+ if host_id and parsed["paths"]:
262
+ try:
263
+ from souleyez.storage.web_paths import WebPathsManager
264
+
265
+ wpm = WebPathsManager()
266
+ paths_added = wpm.bulk_add_web_paths(host_id, parsed["paths"])
267
+ except Exception as e:
268
+ logger.warning(f"Failed to store web paths: {e}")
269
+
270
+ # Create findings for sensitive paths
271
+ created_findings = self._create_findings_for_sensitive_paths(
272
+ engagement_id, host_id, parsed["paths"], findings_manager
273
+ )
274
+
275
+ stats = get_paths_stats(parsed)
276
+
277
+ # Extract PHP files for auto-chaining
278
+ php_files = [
279
+ (path.get("url"), path.get("status_code"))
280
+ for path in parsed["paths"]
281
+ if path.get("url", "").endswith(".php")
282
+ and path.get("status_code") in [200, 401, 403]
283
+ ]
284
+
285
+ # Extract ASP/ASPX files
286
+ asp_files = [
287
+ (path.get("url"), path.get("status_code"))
288
+ for path in parsed["paths"]
289
+ if (
290
+ path.get("url", "").lower().endswith(".asp")
291
+ or path.get("url", "").lower().endswith(".aspx")
292
+ )
293
+ and path.get("status_code") in [200, 401, 403]
294
+ ]
295
+
296
+ # Extract high-value directories from redirects
297
+ # Include all redirect codes: 301, 302, 303, 307, 308
298
+ # 302 is common for CMS paths (e.g., /dashboard -> /wp-admin/)
299
+ high_value_dirs = []
300
+ redirect_codes = [301, 302, 303, 307, 308]
301
+ for path in parsed["paths"]:
302
+ if path.get("status_code") in redirect_codes and path.get("redirect"):
303
+ url = path.get("url", "").lower()
304
+ redirect = path.get("redirect", "").lower()
305
+ if any(
306
+ keyword in url or keyword in redirect
307
+ for keyword in self.HIGH_VALUE_DIR_KEYWORDS
308
+ ):
309
+ high_value_dirs.append(path.get("redirect"))
310
+
311
+ # Check for wildcard response
312
+ wildcard_detected = False
313
+ exclude_length = None
314
+ if "the server returns a status code that matches" in log_content:
315
+ wildcard_detected = True
316
+ length_match = re.search(r"\(Length: (\d+)\)", log_content)
317
+ if length_match:
318
+ exclude_length = length_match.group(1)
319
+
320
+ # Check for host-level redirect
321
+ host_redirect_detected = False
322
+ redirect_target = None
323
+ if "HOST_REDIRECT_TARGET:" in log_content:
324
+ host_redirect_detected = True
325
+ redirect_match = re.search(
326
+ r"HOST_REDIRECT_TARGET:\s*(\S+)", log_content
327
+ )
328
+ if redirect_match:
329
+ redirect_target = redirect_match.group(1)
330
+
331
+ # Check for gobuster errors
332
+ gobuster_error = detect_tool_error(log_content, "gobuster")
333
+
334
+ # Check for home directory exposure (.bashrc, .profile = web root is a home dir)
335
+ home_dir_files = [
336
+ path.get("url")
337
+ for path in parsed["paths"]
338
+ if any(
339
+ path.get("url", "").endswith(f)
340
+ for f in [".bashrc", ".profile", ".bash_profile", ".zshrc"]
341
+ )
342
+ and path.get("status_code") == 200
343
+ ]
344
+ home_directory_exposure = len(home_dir_files) > 0
345
+
346
+ # Determine status
347
+ if gobuster_error:
348
+ status = STATUS_ERROR
349
+ elif host_redirect_detected:
350
+ status = STATUS_WARNING
351
+ elif wildcard_detected:
352
+ status = STATUS_WARNING
353
+ elif stats["total"] > 0:
354
+ status = STATUS_DONE
355
+ else:
356
+ status = STATUS_NO_RESULTS
357
+
358
+ # === CONTENT EXTRACTION (safe/additive) ===
359
+ # Extract paths from robots.txt, sitemap.xml if found
360
+ # This is wrapped in try/except - failures don't affect existing results
361
+ extracted_content = {"extracted_paths": [], "extraction_sources": []}
362
+ try:
363
+ if parsed["paths"] and parsed.get("target_url"):
364
+ extracted_content = self._extract_content_from_discovered_files(
365
+ parsed["paths"], parsed["target_url"]
366
+ )
367
+ except Exception as e:
368
+ # Log but never fail the job
369
+ logger.debug(f"Content extraction skipped: {e}")
370
+
371
+ # Build summary for job queue display
372
+ summary_parts = []
373
+ if stats["total"] > 0:
374
+ summary_parts.append(f"{stats['total']} path(s)")
375
+ if len(created_findings) > 0:
376
+ summary_parts.append(f"{len(created_findings)} finding(s)")
377
+ if wildcard_detected:
378
+ summary_parts.append("wildcard")
379
+ summary = " | ".join(summary_parts) if summary_parts else "No paths found"
380
+
381
+ result = {
382
+ "tool": "gobuster",
383
+ "status": status,
384
+ "summary": summary,
385
+ "paths_added": paths_added,
386
+ "total_paths": stats["total"],
387
+ "paths_found": stats["total"],
388
+ "redirects_found": stats.get("redirects", 0),
389
+ "by_status": stats["by_status"],
390
+ "target_url": parsed.get("target_url"),
391
+ "findings": created_findings,
392
+ "php_files": php_files,
393
+ "asp_files": asp_files,
394
+ "high_value_dirs": high_value_dirs,
395
+ "home_directory_exposure": home_directory_exposure,
396
+ # New: extracted paths from robots.txt/sitemap.xml for chaining
397
+ "extracted_paths": extracted_content.get("extracted_paths", []),
398
+ "extraction_sources": extracted_content.get("extraction_sources", []),
399
+ }
400
+
401
+ if wildcard_detected:
402
+ result["wildcard_detected"] = True
403
+ if exclude_length:
404
+ result["exclude_length"] = exclude_length
405
+
406
+ if host_redirect_detected:
407
+ result["host_redirect_detected"] = True
408
+ if redirect_target:
409
+ result["redirect_target"] = redirect_target
410
+
411
+ return result
412
+
413
+ except Exception as e:
414
+ logger.error(f"Error parsing gobuster job: {e}")
415
+ return {"error": str(e)}
416
+
417
+ def _create_findings_for_sensitive_paths(
418
+ self, engagement_id: int, host_id: int, paths: List[Dict], findings_manager: Any
419
+ ) -> List[Dict]:
420
+ """Create findings for sensitive/interesting paths discovered."""
421
+ created_findings = []
422
+
423
+ for path_entry in paths:
424
+ url = path_entry.get("url", "").lower()
425
+ for concern_type, concern_info in self.SECURITY_CONCERN_PATTERNS.items():
426
+ for pattern in concern_info["patterns"]:
427
+ if re.search(pattern, url, re.IGNORECASE):
428
+ try:
429
+ findings_manager.add_finding(
430
+ engagement_id=engagement_id,
431
+ host_id=host_id,
432
+ title=f"{concern_info['label']}: {path_entry.get('path', '')}",
433
+ finding_type="web_path",
434
+ severity=concern_info["severity"],
435
+ description=f"Gobuster discovered a potentially sensitive path: {path_entry.get('url', '')}\n"
436
+ f"Status code: {path_entry.get('status_code', 'unknown')}\n"
437
+ f"Category: {concern_info['label']}",
438
+ tool="gobuster",
439
+ )
440
+ created_findings.append(
441
+ {
442
+ "url": path_entry.get("url"),
443
+ "title": f"{concern_info['label']}: {path_entry.get('path', '')}",
444
+ "type": concern_type,
445
+ "severity": concern_info["severity"],
446
+ }
447
+ )
448
+ except Exception as e:
449
+ logger.warning(f"Failed to create finding: {e}")
450
+ break
451
+
452
+ return created_findings
453
+
454
+ def _identify_security_concerns(self, paths: List[Dict]) -> List[Dict]:
455
+ """Identify security concerns in discovered paths."""
456
+ concerns = []
457
+
458
+ for path_entry in paths:
459
+ url = path_entry.get("url", "").lower()
460
+ for concern_type, concern_info in self.SECURITY_CONCERN_PATTERNS.items():
461
+ for pattern in concern_info["patterns"]:
462
+ if re.search(pattern, url, re.IGNORECASE):
463
+ concerns.append(
464
+ {
465
+ "url": path_entry.get("url", ""),
466
+ "type": concern_type,
467
+ "label": concern_info["label"],
468
+ "severity": concern_info["severity"],
469
+ "status_code": path_entry.get("status_code", "unknown"),
470
+ }
471
+ )
472
+ break
473
+
474
+ return concerns
475
+
476
+ def _extract_content_from_discovered_files(
477
+ self,
478
+ paths: List[Dict],
479
+ base_url: str,
480
+ ) -> Dict[str, Any]:
481
+ """
482
+ Extract additional paths/URLs from discovered files like robots.txt and sitemap.xml.
483
+
484
+ This is wrapped in comprehensive error handling - if ANYTHING fails,
485
+ we return empty results and let the normal flow continue.
486
+
487
+ Args:
488
+ paths: List of discovered paths from gobuster
489
+ base_url: Base URL of the target
490
+
491
+ Returns:
492
+ Dict with extracted_paths list and metadata
493
+ """
494
+ result = {
495
+ "extracted_paths": [],
496
+ "extraction_sources": [],
497
+ "extraction_errors": [],
498
+ }
499
+
500
+ try:
501
+ # Find extractable files in discovered paths
502
+ for path_entry in paths:
503
+ url = path_entry.get("url", "")
504
+ status = path_entry.get("status_code")
505
+
506
+ # Only process 200 OK responses
507
+ if status != 200:
508
+ continue
509
+
510
+ # Check if this is an extractable file
511
+ for filename, extractor_method in self.CONTENT_EXTRACTION_FILES.items():
512
+ if url.lower().endswith(filename):
513
+ try:
514
+ extractor = getattr(self, extractor_method)
515
+ extracted = extractor(url, base_url)
516
+ if extracted:
517
+ result["extracted_paths"].extend(extracted)
518
+ result["extraction_sources"].append(
519
+ {
520
+ "file": filename,
521
+ "url": url,
522
+ "paths_found": len(extracted),
523
+ }
524
+ )
525
+ logger.info(
526
+ f"Extracted {len(extracted)} paths from {url}"
527
+ )
528
+ except Exception as e:
529
+ # Log but don't fail - this is purely additive
530
+ logger.warning(f"Failed to extract from {url}: {e}")
531
+ result["extraction_errors"].append(
532
+ {
533
+ "file": filename,
534
+ "url": url,
535
+ "error": str(e),
536
+ }
537
+ )
538
+ break
539
+
540
+ # Deduplicate extracted paths
541
+ seen = set()
542
+ unique_paths = []
543
+ for path in result["extracted_paths"]:
544
+ if path not in seen:
545
+ seen.add(path)
546
+ unique_paths.append(path)
547
+ result["extracted_paths"] = unique_paths
548
+
549
+ except Exception as e:
550
+ # Catch-all: if anything unexpected happens, log and return empty
551
+ logger.warning(f"Content extraction failed (non-fatal): {e}")
552
+ result["extraction_errors"].append({"error": str(e)})
553
+
554
+ return result
555
+
556
+ def _extract_robots_paths(self, robots_url: str, base_url: str) -> List[str]:
557
+ """
558
+ Fetch and parse robots.txt to extract Disallow/Allow paths.
559
+
560
+ Args:
561
+ robots_url: Full URL to robots.txt
562
+ base_url: Base URL for constructing full paths
563
+
564
+ Returns:
565
+ List of full URLs to scan
566
+ """
567
+ paths = []
568
+
569
+ try:
570
+ # Validate URL scheme (security: prevent file:// and other schemes)
571
+ parsed = urlparse(robots_url)
572
+ if parsed.scheme.lower() not in ("http", "https"):
573
+ logger.debug(
574
+ f"Skipping robots.txt fetch - invalid scheme: {parsed.scheme}"
575
+ )
576
+ return paths
577
+
578
+ # Create SSL context that ignores cert errors (for self-signed)
579
+ ctx = ssl.create_default_context()
580
+ ctx.check_hostname = False
581
+ ctx.verify_mode = ssl.CERT_NONE
582
+
583
+ req = urllib.request.Request(
584
+ robots_url,
585
+ headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
586
+ )
587
+
588
+ with urllib.request.urlopen(
589
+ req, timeout=10, context=ctx
590
+ ) as response: # nosec B310
591
+ content = response.read().decode("utf-8", errors="replace")
592
+
593
+ # Known robots.txt directives to skip
594
+ known_directives = [
595
+ "user-agent:",
596
+ "disallow:",
597
+ "allow:",
598
+ "sitemap:",
599
+ "crawl-delay:",
600
+ "host:",
601
+ "request-rate:",
602
+ ]
603
+
604
+ # Parse robots.txt format
605
+ for line in content.split("\n"):
606
+ line = line.strip()
607
+
608
+ # Skip comments and empty lines
609
+ if not line or line.startswith("#"):
610
+ continue
611
+
612
+ line_lower = line.lower()
613
+
614
+ # Extract Disallow and Allow paths (standard format)
615
+ if line_lower.startswith("disallow:") or line_lower.startswith(
616
+ "allow:"
617
+ ):
618
+ _, _, path = line.partition(":")
619
+ path = path.strip()
620
+
621
+ # Skip empty paths, wildcards, and query strings
622
+ if not path or path == "/" or "*" in path or "?" in path:
623
+ continue
624
+
625
+ # Build full URL
626
+ full_url = urljoin(base_url.rstrip("/") + "/", path.lstrip("/"))
627
+
628
+ if full_url not in paths:
629
+ paths.append(full_url)
630
+
631
+ # Extract bare paths (CTF-style hints like "key-1-of-3.txt")
632
+ # These are non-standard but common in CTFs and some configs
633
+ # Only extract if it looks like a file (has extension) to avoid garbage
634
+ elif not any(line_lower.startswith(d) for d in known_directives):
635
+ path = line.strip()
636
+ # Must look like a file with extension (1-5 char extension)
637
+ # Examples: key-1-of-3.txt, fsocity.dic, backup.sql
638
+ if path and re.match(r"^[\w\-./]+\.\w{1,5}$", path):
639
+ # Build full URL
640
+ full_url = urljoin(base_url.rstrip("/") + "/", path.lstrip("/"))
641
+
642
+ if full_url not in paths:
643
+ paths.append(full_url)
644
+ logger.debug(f"Extracted bare path from robots.txt: {path}")
645
+
646
+ logger.debug(f"Extracted {len(paths)} paths from robots.txt")
647
+
648
+ except Exception as e:
649
+ logger.debug(f"Failed to fetch/parse robots.txt: {e}")
650
+ # Don't raise - just return empty list
651
+
652
+ return paths
653
+
654
+ def _extract_sitemap_urls(self, sitemap_url: str, base_url: str) -> List[str]:
655
+ """
656
+ Fetch and parse sitemap.xml to extract URLs.
657
+
658
+ Args:
659
+ sitemap_url: Full URL to sitemap.xml
660
+ base_url: Base URL (not used but kept for consistent interface)
661
+
662
+ Returns:
663
+ List of URLs from sitemap
664
+ """
665
+ urls = []
666
+
667
+ try:
668
+ # Validate URL scheme (security: prevent file:// and other schemes)
669
+ parsed = urlparse(sitemap_url)
670
+ if parsed.scheme.lower() not in ("http", "https"):
671
+ logger.debug(
672
+ f"Skipping sitemap.xml fetch - invalid scheme: {parsed.scheme}"
673
+ )
674
+ return urls
675
+
676
+ # Create SSL context that ignores cert errors (for self-signed)
677
+ ctx = ssl.create_default_context()
678
+ ctx.check_hostname = False
679
+ ctx.verify_mode = ssl.CERT_NONE
680
+
681
+ req = urllib.request.Request(
682
+ sitemap_url,
683
+ headers={"User-Agent": "Mozilla/5.0 (compatible; SoulEyez/1.0)"},
684
+ )
685
+
686
+ with urllib.request.urlopen(
687
+ req, timeout=10, context=ctx
688
+ ) as response: # nosec B310
689
+ content = response.read().decode("utf-8", errors="replace")
690
+
691
+ # Parse XML
692
+ try:
693
+ root = ElementTree.fromstring(content)
694
+
695
+ # Handle namespace (sitemaps usually have xmlns)
696
+ ns = {"sm": "http://www.sitemaps.org/schemas/sitemap/0.9"}
697
+
698
+ # Try with namespace first
699
+ for loc in root.findall(".//sm:loc", ns):
700
+ if loc.text:
701
+ urls.append(loc.text.strip())
702
+
703
+ # If no results, try without namespace
704
+ if not urls:
705
+ for loc in root.findall(".//loc"):
706
+ if loc.text:
707
+ urls.append(loc.text.strip())
708
+
709
+ except ElementTree.ParseError:
710
+ # Not valid XML - try regex fallback
711
+ loc_matches = re.findall(r"<loc>([^<]+)</loc>", content)
712
+ urls.extend(loc_matches)
713
+
714
+ logger.debug(f"Extracted {len(urls)} URLs from sitemap.xml")
715
+
716
+ except Exception as e:
717
+ logger.debug(f"Failed to fetch/parse sitemap.xml: {e}")
718
+ # Don't raise - just return empty list
719
+
720
+ return urls
721
+
722
+ def display_done(
723
+ self,
724
+ job: Dict[str, Any],
725
+ log_path: str,
726
+ show_all: bool = False,
727
+ show_passwords: bool = False,
728
+ ) -> None:
729
+ """Display successful Gobuster scan results."""
730
+ try:
731
+ from souleyez.parsers.gobuster_parser import parse_gobuster_output
732
+
733
+ if not log_path or not os.path.exists(log_path):
734
+ return
735
+
736
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
737
+ log_content = f.read()
738
+
739
+ parsed = parse_gobuster_output(log_content, job.get("target", ""))
740
+ paths = parsed.get("paths", [])
741
+
742
+ if not paths:
743
+ self.display_no_results(job, log_path)
744
+ return
745
+
746
+ # Identify security concerns
747
+ security_concerns = self._identify_security_concerns(paths)
748
+
749
+ # Display security concerns if found
750
+ if security_concerns:
751
+ click.echo(click.style("=" * 70, fg="red"))
752
+ click.echo(click.style("SECURITY CONCERNS", bold=True, fg="red"))
753
+ click.echo(click.style("=" * 70, fg="red"))
754
+ click.echo()
755
+
756
+ high_concerns = [
757
+ c for c in security_concerns if c["severity"] == "high"
758
+ ]
759
+ medium_concerns = [
760
+ c for c in security_concerns if c["severity"] == "medium"
761
+ ]
762
+ low_concerns = [c for c in security_concerns if c["severity"] == "low"]
763
+
764
+ if high_concerns:
765
+ click.echo(
766
+ click.style("[HIGH] Critical findings:", fg="red", bold=True)
767
+ )
768
+ by_label = {}
769
+ for c in high_concerns:
770
+ if c["label"] not in by_label:
771
+ by_label[c["label"]] = []
772
+ by_label[c["label"]].append(c["url"])
773
+ for label, urls in by_label.items():
774
+ click.echo(click.style(f" - {label}:", fg="red"))
775
+ for url in urls[:5]:
776
+ click.echo(f" {url}")
777
+ if len(urls) > 5:
778
+ click.echo(f" ... and {len(urls) - 5} more")
779
+ click.echo()
780
+
781
+ if medium_concerns:
782
+ click.echo(
783
+ click.style(
784
+ "[MEDIUM] Notable findings:", fg="yellow", bold=True
785
+ )
786
+ )
787
+ by_label = {}
788
+ for c in medium_concerns:
789
+ if c["label"] not in by_label:
790
+ by_label[c["label"]] = []
791
+ by_label[c["label"]].append(c["url"])
792
+ for label, urls in by_label.items():
793
+ click.echo(click.style(f" - {label}:", fg="yellow"))
794
+ for url in urls[:5]:
795
+ click.echo(f" {url}")
796
+ if len(urls) > 5:
797
+ click.echo(f" ... and {len(urls) - 5} more")
798
+ click.echo()
799
+
800
+ if low_concerns:
801
+ click.echo(
802
+ click.style("[LOW] Worth investigating:", fg="cyan", bold=True)
803
+ )
804
+ by_label = {}
805
+ for c in low_concerns:
806
+ if c["label"] not in by_label:
807
+ by_label[c["label"]] = []
808
+ by_label[c["label"]].append(c["url"])
809
+ for label, urls in by_label.items():
810
+ click.echo(f" - {label}: {len(urls)} path(s)")
811
+ click.echo()
812
+
813
+ # Display discovered paths
814
+ click.echo(click.style("=" * 70, fg="cyan"))
815
+ click.echo(click.style("DISCOVERED WEB PATHS", bold=True, fg="cyan"))
816
+ click.echo(click.style("=" * 70, fg="cyan"))
817
+ click.echo()
818
+ click.echo(f"Total found: {len(paths)}")
819
+ click.echo()
820
+
821
+ # Group by status code
822
+ status_groups = {}
823
+ for path in paths:
824
+ status = path.get("status_code", "unknown")
825
+ if status not in status_groups:
826
+ status_groups[status] = []
827
+ status_groups[status].append(path)
828
+
829
+ # Display by status code
830
+ for status in sorted(status_groups.keys()):
831
+ status_color = (
832
+ "green"
833
+ if status == 200
834
+ else "cyan" if status in [301, 302] else "yellow"
835
+ )
836
+ click.echo(
837
+ click.style(
838
+ f"[{status}] ({len(status_groups[status])} paths)",
839
+ bold=True,
840
+ fg=status_color,
841
+ )
842
+ )
843
+
844
+ paths_to_show = (
845
+ status_groups[status] if show_all else status_groups[status][:10]
846
+ )
847
+
848
+ for path in paths_to_show:
849
+ url = path.get("url", "")
850
+ size = path.get("size", "")
851
+ redirect = path.get("redirect", "")
852
+
853
+ if redirect:
854
+ click.echo(f" {url} -> {redirect}")
855
+ elif size:
856
+ click.echo(f" {url} ({size} bytes)")
857
+ else:
858
+ click.echo(f" {url}")
859
+
860
+ if not show_all and len(status_groups[status]) > 10:
861
+ click.echo(f" ... and {len(status_groups[status]) - 10} more")
862
+ click.echo()
863
+
864
+ # Show extracted paths from robots.txt/sitemap.xml if any
865
+ try:
866
+ target_url = parsed.get("target_url") or job.get("target", "")
867
+ if target_url and paths:
868
+ extracted = self._extract_content_from_discovered_files(
869
+ paths, target_url
870
+ )
871
+ extracted_paths = extracted.get("extracted_paths", [])
872
+ extraction_sources = extracted.get("extraction_sources", [])
873
+
874
+ if extracted_paths:
875
+ # Format source names from dict list
876
+ source_names = [
877
+ s.get("file", "unknown") for s in extraction_sources
878
+ ]
879
+
880
+ click.echo(click.style("=" * 70, fg="magenta"))
881
+ click.echo(
882
+ click.style(
883
+ "EXTRACTED PATHS (from robots.txt/sitemap.xml)",
884
+ bold=True,
885
+ fg="magenta",
886
+ )
887
+ )
888
+ click.echo(click.style("=" * 70, fg="magenta"))
889
+ click.echo()
890
+ click.echo(f"Sources: {', '.join(source_names)}")
891
+ click.echo(f"Paths found: {len(extracted_paths)}")
892
+ click.echo()
893
+ click.echo(
894
+ click.style(
895
+ "These paths triggered follow-up gobuster scans:",
896
+ fg="magenta",
897
+ )
898
+ )
899
+ for ep in extracted_paths[:10]:
900
+ click.echo(f" {ep}")
901
+ if len(extracted_paths) > 10:
902
+ click.echo(f" ... and {len(extracted_paths) - 10} more")
903
+ click.echo()
904
+ except Exception as e:
905
+ logger.debug(f"Extraction display failed: {e}") # Log for debugging
906
+
907
+ # Display next steps suggestions
908
+ try:
909
+ from souleyez.parsers.gobuster_parser import generate_next_steps
910
+
911
+ next_steps = generate_next_steps(parsed)
912
+ if next_steps:
913
+ click.echo(click.style("=" * 70, fg="green"))
914
+ click.echo(
915
+ click.style("SUGGESTED NEXT STEPS", bold=True, fg="green")
916
+ )
917
+ click.echo(click.style("=" * 70, fg="green"))
918
+ click.echo()
919
+ for i, step in enumerate(next_steps[:5], 1):
920
+ click.echo(click.style(f"{i}. {step['title']}", bold=True))
921
+ click.echo(click.style(f" Why: {step['reason']}", fg="white"))
922
+ for cmd in step.get("commands", [])[:2]:
923
+ click.echo(click.style(f" $ {cmd}", fg="cyan"))
924
+ click.echo()
925
+ if len(next_steps) > 5:
926
+ click.echo(f" ... and {len(next_steps) - 5} more suggestions")
927
+ click.echo()
928
+ except Exception as e:
929
+ logger.debug(f"Next steps display failed: {e}")
930
+
931
+ click.echo(click.style("=" * 70, fg="cyan"))
932
+ click.echo()
933
+
934
+ except Exception as e:
935
+ logger.debug(f"Error in display_done: {e}")
936
+
937
+ def display_warning(
938
+ self,
939
+ job: Dict[str, Any],
940
+ log_path: str,
941
+ log_content: Optional[str] = None,
942
+ ) -> None:
943
+ """Display warning status for Gobuster scan."""
944
+ # Read log if not provided
945
+ if log_content is None and log_path and os.path.exists(log_path):
946
+ try:
947
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
948
+ log_content = f.read()
949
+ except Exception:
950
+ log_content = ""
951
+
952
+ click.echo(click.style("=" * 70, fg="yellow"))
953
+ click.echo(click.style("[WARNING] GOBUSTER SCAN", bold=True, fg="yellow"))
954
+ click.echo(click.style("=" * 70, fg="yellow"))
955
+ click.echo()
956
+
957
+ # Check for host redirect
958
+ if log_content and "HOST_REDIRECT_TARGET:" in log_content:
959
+ redirect_match = re.search(r"HOST_REDIRECT_TARGET:\s*(\S+)", log_content)
960
+ if redirect_match:
961
+ redirect_target = redirect_match.group(1)
962
+ click.echo(click.style("Host-Level Redirect Detected", bold=True))
963
+ click.echo(f" - Original target: {job.get('target', 'unknown')}")
964
+ click.echo(f" - Redirects to: {redirect_target}")
965
+ click.echo()
966
+ click.echo(" The server redirects ALL requests to a different host.")
967
+ click.echo(
968
+ " Results are unreliable due to variable redirect response sizes."
969
+ )
970
+ click.echo()
971
+ click.echo(
972
+ click.style(
973
+ " A retry job was auto-queued with the correct target.",
974
+ fg="green",
975
+ )
976
+ )
977
+ click.echo()
978
+
979
+ # Check for wildcard response
980
+ elif log_content and (
981
+ "wildcard" in log_content.lower()
982
+ or "the server returns a status code that matches" in log_content.lower()
983
+ ):
984
+ click.echo(click.style("Wildcard Response Detected", bold=True))
985
+ click.echo(" The server returns the same response for ALL URLs.")
986
+ click.echo(" Gobuster cannot differentiate real vs fake paths.")
987
+ click.echo()
988
+ length_match = re.search(r"Length:\s*(\d+)", log_content)
989
+ if length_match:
990
+ click.echo(f" - Response length: {length_match.group(1)} bytes")
991
+ click.echo()
992
+ click.echo(
993
+ click.style(
994
+ " A retry job was auto-queued with --exclude-length.",
995
+ fg="green",
996
+ )
997
+ )
998
+ click.echo()
999
+
1000
+ else:
1001
+ click.echo(" Scan completed with warnings. Check raw logs for details.")
1002
+ click.echo(" Press [r] to view raw logs.")
1003
+ click.echo()
1004
+
1005
+ click.echo(click.style("=" * 70, fg="yellow"))
1006
+ click.echo()
1007
+
1008
+ def display_error(
1009
+ self,
1010
+ job: Dict[str, Any],
1011
+ log_path: str,
1012
+ log_content: Optional[str] = None,
1013
+ ) -> None:
1014
+ """Display error status for Gobuster scan."""
1015
+ # Read log if not provided
1016
+ if log_content is None and log_path and os.path.exists(log_path):
1017
+ try:
1018
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
1019
+ log_content = f.read()
1020
+ except Exception:
1021
+ log_content = ""
1022
+
1023
+ click.echo(click.style("=" * 70, fg="red"))
1024
+ click.echo(click.style("[ERROR] GOBUSTER SCAN FAILED", bold=True, fg="red"))
1025
+ click.echo(click.style("=" * 70, fg="red"))
1026
+ click.echo()
1027
+
1028
+ # Check if it was a timeout
1029
+ if log_content and (
1030
+ "timed out" in log_content.lower() or "Command timed out" in log_content
1031
+ ):
1032
+ click.echo(" Scan reached timeout before completing.")
1033
+ click.echo()
1034
+ click.echo(click.style(" Possible causes:", fg="bright_black"))
1035
+ click.echo(
1036
+ click.style(" - Target is rate limiting requests", fg="bright_black")
1037
+ )
1038
+ click.echo(
1039
+ click.style(
1040
+ " - Wordlist too large for timeout window", fg="bright_black"
1041
+ )
1042
+ )
1043
+ click.echo(click.style(" - Network latency issues", fg="bright_black"))
1044
+ click.echo()
1045
+ click.echo(click.style(" Suggestions:", fg="bright_black"))
1046
+ click.echo(click.style(" - Try smaller wordlist", fg="bright_black"))
1047
+ click.echo(
1048
+ click.style(
1049
+ " - Increase --delay between requests", fg="bright_black"
1050
+ )
1051
+ )
1052
+ click.echo(click.style(" - Reduce threads with -t", fg="bright_black"))
1053
+ else:
1054
+ error_msg = None
1055
+ if log_content and "ERROR:" in log_content:
1056
+ match = re.search(r"ERROR:\s*(.+?)(?:\n|$)", log_content)
1057
+ if match:
1058
+ error_msg = match.group(1).strip()
1059
+
1060
+ if error_msg:
1061
+ click.echo(f" Error: {error_msg}")
1062
+ else:
1063
+ click.echo(" Scan failed - see raw logs for details.")
1064
+ click.echo(" Press [r] to view raw logs.")
1065
+
1066
+ click.echo()
1067
+ click.echo(click.style("=" * 70, fg="red"))
1068
+ click.echo()
1069
+
1070
+ def display_no_results(
1071
+ self,
1072
+ job: Dict[str, Any],
1073
+ log_path: str,
1074
+ ) -> None:
1075
+ """Display no_results status for Gobuster scan."""
1076
+ click.echo(click.style("=" * 70, fg="cyan"))
1077
+ click.echo(click.style("GOBUSTER SCAN RESULTS", bold=True, fg="cyan"))
1078
+ click.echo(click.style("=" * 70, fg="cyan"))
1079
+ click.echo()
1080
+ click.echo(" No paths discovered.")
1081
+ click.echo()
1082
+
1083
+ # Extract wordlist name from args
1084
+ args = job.get("args", [])
1085
+ for i, arg in enumerate(args):
1086
+ if arg == "-w" and i + 1 < len(args):
1087
+ wordlist = os.path.basename(args[i + 1])
1088
+ click.echo(f" Wordlist: {wordlist}")
1089
+ break
1090
+
1091
+ # Extract extensions
1092
+ for i, arg in enumerate(args):
1093
+ if arg == "-x" and i + 1 < len(args):
1094
+ click.echo(f" Extensions: {args[i + 1]}")
1095
+ break
1096
+
1097
+ click.echo()
1098
+ click.echo(click.style(" This could mean:", fg="bright_black"))
1099
+ click.echo(
1100
+ click.style(
1101
+ " - Target has good security (no exposed paths)", fg="bright_black"
1102
+ )
1103
+ )
1104
+ click.echo(
1105
+ click.style(" - Try a different/larger wordlist", fg="bright_black")
1106
+ )
1107
+ click.echo(
1108
+ click.style(
1109
+ " - Target may be blocking automated requests", fg="bright_black"
1110
+ )
1111
+ )
1112
+ click.echo()
1113
+ click.echo(click.style("=" * 70, fg="cyan"))
1114
+ click.echo()