souleyez 2.43.28__py3-none-any.whl → 2.43.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (356) hide show
  1. souleyez/__init__.py +1 -2
  2. souleyez/ai/__init__.py +21 -15
  3. souleyez/ai/action_mapper.py +249 -150
  4. souleyez/ai/chain_advisor.py +116 -100
  5. souleyez/ai/claude_provider.py +29 -28
  6. souleyez/ai/context_builder.py +80 -62
  7. souleyez/ai/executor.py +158 -117
  8. souleyez/ai/feedback_handler.py +136 -121
  9. souleyez/ai/llm_factory.py +27 -20
  10. souleyez/ai/llm_provider.py +4 -2
  11. souleyez/ai/ollama_provider.py +6 -9
  12. souleyez/ai/ollama_service.py +44 -37
  13. souleyez/ai/path_scorer.py +91 -76
  14. souleyez/ai/recommender.py +176 -144
  15. souleyez/ai/report_context.py +74 -73
  16. souleyez/ai/report_service.py +84 -66
  17. souleyez/ai/result_parser.py +222 -229
  18. souleyez/ai/safety.py +67 -44
  19. souleyez/auth/__init__.py +23 -22
  20. souleyez/auth/audit.py +36 -26
  21. souleyez/auth/engagement_access.py +65 -48
  22. souleyez/auth/permissions.py +14 -3
  23. souleyez/auth/session_manager.py +54 -37
  24. souleyez/auth/user_manager.py +109 -64
  25. souleyez/commands/audit.py +40 -43
  26. souleyez/commands/auth.py +35 -15
  27. souleyez/commands/deliverables.py +55 -50
  28. souleyez/commands/engagement.py +47 -28
  29. souleyez/commands/license.py +32 -23
  30. souleyez/commands/screenshots.py +36 -32
  31. souleyez/commands/user.py +82 -36
  32. souleyez/config.py +52 -44
  33. souleyez/core/credential_tester.py +87 -81
  34. souleyez/core/cve_mappings.py +179 -192
  35. souleyez/core/cve_matcher.py +162 -148
  36. souleyez/core/msf_auto_mapper.py +100 -83
  37. souleyez/core/msf_chain_engine.py +294 -256
  38. souleyez/core/msf_database.py +153 -70
  39. souleyez/core/msf_integration.py +679 -673
  40. souleyez/core/msf_rpc_client.py +40 -42
  41. souleyez/core/msf_rpc_manager.py +77 -79
  42. souleyez/core/msf_sync_manager.py +241 -181
  43. souleyez/core/network_utils.py +22 -15
  44. souleyez/core/parser_handler.py +34 -25
  45. souleyez/core/pending_chains.py +114 -63
  46. souleyez/core/templates.py +158 -107
  47. souleyez/core/tool_chaining.py +9592 -2879
  48. souleyez/core/version_utils.py +79 -94
  49. souleyez/core/vuln_correlation.py +136 -89
  50. souleyez/core/web_utils.py +33 -32
  51. souleyez/data/wordlists/ad_users.txt +378 -0
  52. souleyez/data/wordlists/api_endpoints_large.txt +769 -0
  53. souleyez/data/wordlists/home_dir_sensitive.txt +39 -0
  54. souleyez/data/wordlists/lfi_payloads.txt +82 -0
  55. souleyez/data/wordlists/passwords_brute.txt +1548 -0
  56. souleyez/data/wordlists/passwords_crack.txt +2479 -0
  57. souleyez/data/wordlists/passwords_spray.txt +386 -0
  58. souleyez/data/wordlists/subdomains_large.txt +5057 -0
  59. souleyez/data/wordlists/usernames_common.txt +694 -0
  60. souleyez/data/wordlists/web_dirs_large.txt +4769 -0
  61. souleyez/detection/__init__.py +1 -1
  62. souleyez/detection/attack_signatures.py +12 -17
  63. souleyez/detection/mitre_mappings.py +61 -55
  64. souleyez/detection/validator.py +97 -86
  65. souleyez/devtools.py +23 -10
  66. souleyez/docs/README.md +4 -4
  67. souleyez/docs/api-reference/cli-commands.md +2 -2
  68. souleyez/docs/developer-guide/adding-new-tools.md +562 -0
  69. souleyez/docs/user-guide/auto-chaining.md +30 -8
  70. souleyez/docs/user-guide/getting-started.md +1 -1
  71. souleyez/docs/user-guide/installation.md +26 -3
  72. souleyez/docs/user-guide/metasploit-integration.md +2 -2
  73. souleyez/docs/user-guide/rbac.md +1 -1
  74. souleyez/docs/user-guide/scope-management.md +1 -1
  75. souleyez/docs/user-guide/siem-integration.md +1 -1
  76. souleyez/docs/user-guide/tools-reference.md +1 -8
  77. souleyez/docs/user-guide/worker-management.md +1 -1
  78. souleyez/engine/background.py +1238 -535
  79. souleyez/engine/base.py +4 -1
  80. souleyez/engine/job_status.py +17 -49
  81. souleyez/engine/log_sanitizer.py +103 -77
  82. souleyez/engine/manager.py +38 -7
  83. souleyez/engine/result_handler.py +2198 -1550
  84. souleyez/engine/worker_manager.py +50 -41
  85. souleyez/export/evidence_bundle.py +72 -62
  86. souleyez/feature_flags/features.py +16 -20
  87. souleyez/feature_flags.py +5 -9
  88. souleyez/handlers/__init__.py +11 -0
  89. souleyez/handlers/base.py +188 -0
  90. souleyez/handlers/bash_handler.py +277 -0
  91. souleyez/handlers/bloodhound_handler.py +243 -0
  92. souleyez/handlers/certipy_handler.py +311 -0
  93. souleyez/handlers/crackmapexec_handler.py +486 -0
  94. souleyez/handlers/dnsrecon_handler.py +344 -0
  95. souleyez/handlers/enum4linux_handler.py +400 -0
  96. souleyez/handlers/evil_winrm_handler.py +493 -0
  97. souleyez/handlers/ffuf_handler.py +815 -0
  98. souleyez/handlers/gobuster_handler.py +1114 -0
  99. souleyez/handlers/gpp_extract_handler.py +334 -0
  100. souleyez/handlers/hashcat_handler.py +444 -0
  101. souleyez/handlers/hydra_handler.py +563 -0
  102. souleyez/handlers/impacket_getuserspns_handler.py +343 -0
  103. souleyez/handlers/impacket_psexec_handler.py +222 -0
  104. souleyez/handlers/impacket_secretsdump_handler.py +426 -0
  105. souleyez/handlers/john_handler.py +286 -0
  106. souleyez/handlers/katana_handler.py +425 -0
  107. souleyez/handlers/kerbrute_handler.py +298 -0
  108. souleyez/handlers/ldapsearch_handler.py +636 -0
  109. souleyez/handlers/lfi_extract_handler.py +464 -0
  110. souleyez/handlers/msf_auxiliary_handler.py +408 -0
  111. souleyez/handlers/msf_exploit_handler.py +380 -0
  112. souleyez/handlers/nikto_handler.py +413 -0
  113. souleyez/handlers/nmap_handler.py +821 -0
  114. souleyez/handlers/nuclei_handler.py +359 -0
  115. souleyez/handlers/nxc_handler.py +371 -0
  116. souleyez/handlers/rdp_sec_check_handler.py +353 -0
  117. souleyez/handlers/registry.py +288 -0
  118. souleyez/handlers/responder_handler.py +232 -0
  119. souleyez/handlers/service_explorer_handler.py +434 -0
  120. souleyez/handlers/smbclient_handler.py +344 -0
  121. souleyez/handlers/smbmap_handler.py +510 -0
  122. souleyez/handlers/smbpasswd_handler.py +296 -0
  123. souleyez/handlers/sqlmap_handler.py +1116 -0
  124. souleyez/handlers/theharvester_handler.py +601 -0
  125. souleyez/handlers/whois_handler.py +277 -0
  126. souleyez/handlers/wpscan_handler.py +554 -0
  127. souleyez/history.py +32 -16
  128. souleyez/importers/msf_importer.py +106 -75
  129. souleyez/importers/smart_importer.py +208 -147
  130. souleyez/integrations/siem/__init__.py +10 -10
  131. souleyez/integrations/siem/base.py +17 -18
  132. souleyez/integrations/siem/elastic.py +108 -122
  133. souleyez/integrations/siem/factory.py +207 -80
  134. souleyez/integrations/siem/googlesecops.py +146 -154
  135. souleyez/integrations/siem/rule_mappings/__init__.py +1 -1
  136. souleyez/integrations/siem/rule_mappings/wazuh_rules.py +8 -5
  137. souleyez/integrations/siem/sentinel.py +107 -109
  138. souleyez/integrations/siem/splunk.py +246 -212
  139. souleyez/integrations/siem/wazuh.py +65 -71
  140. souleyez/integrations/wazuh/__init__.py +5 -5
  141. souleyez/integrations/wazuh/client.py +70 -93
  142. souleyez/integrations/wazuh/config.py +85 -57
  143. souleyez/integrations/wazuh/host_mapper.py +28 -36
  144. souleyez/integrations/wazuh/sync.py +78 -68
  145. souleyez/intelligence/__init__.py +4 -5
  146. souleyez/intelligence/correlation_analyzer.py +309 -295
  147. souleyez/intelligence/exploit_knowledge.py +661 -623
  148. souleyez/intelligence/exploit_suggestions.py +159 -139
  149. souleyez/intelligence/gap_analyzer.py +132 -97
  150. souleyez/intelligence/gap_detector.py +251 -214
  151. souleyez/intelligence/sensitive_tables.py +266 -129
  152. souleyez/intelligence/service_parser.py +137 -123
  153. souleyez/intelligence/surface_analyzer.py +407 -268
  154. souleyez/intelligence/target_parser.py +159 -162
  155. souleyez/licensing/__init__.py +6 -6
  156. souleyez/licensing/validator.py +17 -19
  157. souleyez/log_config.py +79 -54
  158. souleyez/main.py +1505 -687
  159. souleyez/migrations/fix_job_counter.py +16 -14
  160. souleyez/parsers/bloodhound_parser.py +41 -39
  161. souleyez/parsers/crackmapexec_parser.py +178 -111
  162. souleyez/parsers/dalfox_parser.py +72 -77
  163. souleyez/parsers/dnsrecon_parser.py +103 -91
  164. souleyez/parsers/enum4linux_parser.py +183 -153
  165. souleyez/parsers/ffuf_parser.py +29 -25
  166. souleyez/parsers/gobuster_parser.py +301 -41
  167. souleyez/parsers/hashcat_parser.py +324 -79
  168. souleyez/parsers/http_fingerprint_parser.py +350 -103
  169. souleyez/parsers/hydra_parser.py +131 -111
  170. souleyez/parsers/impacket_parser.py +231 -178
  171. souleyez/parsers/john_parser.py +98 -86
  172. souleyez/parsers/katana_parser.py +316 -0
  173. souleyez/parsers/msf_parser.py +943 -498
  174. souleyez/parsers/nikto_parser.py +346 -65
  175. souleyez/parsers/nmap_parser.py +262 -174
  176. souleyez/parsers/nuclei_parser.py +40 -44
  177. souleyez/parsers/responder_parser.py +26 -26
  178. souleyez/parsers/searchsploit_parser.py +74 -74
  179. souleyez/parsers/service_explorer_parser.py +279 -0
  180. souleyez/parsers/smbmap_parser.py +180 -124
  181. souleyez/parsers/sqlmap_parser.py +434 -308
  182. souleyez/parsers/theharvester_parser.py +75 -57
  183. souleyez/parsers/whois_parser.py +135 -94
  184. souleyez/parsers/wpscan_parser.py +278 -190
  185. souleyez/plugins/afp.py +44 -36
  186. souleyez/plugins/afp_brute.py +114 -46
  187. souleyez/plugins/ard.py +48 -37
  188. souleyez/plugins/bloodhound.py +95 -61
  189. souleyez/plugins/certipy.py +303 -0
  190. souleyez/plugins/crackmapexec.py +186 -85
  191. souleyez/plugins/dalfox.py +120 -59
  192. souleyez/plugins/dns_hijack.py +146 -41
  193. souleyez/plugins/dnsrecon.py +97 -61
  194. souleyez/plugins/enum4linux.py +91 -66
  195. souleyez/plugins/evil_winrm.py +291 -0
  196. souleyez/plugins/ffuf.py +166 -90
  197. souleyez/plugins/firmware_extract.py +133 -29
  198. souleyez/plugins/gobuster.py +387 -190
  199. souleyez/plugins/gpp_extract.py +393 -0
  200. souleyez/plugins/hashcat.py +100 -73
  201. souleyez/plugins/http_fingerprint.py +854 -267
  202. souleyez/plugins/hydra.py +566 -200
  203. souleyez/plugins/impacket_getnpusers.py +117 -69
  204. souleyez/plugins/impacket_psexec.py +84 -64
  205. souleyez/plugins/impacket_secretsdump.py +103 -69
  206. souleyez/plugins/impacket_smbclient.py +89 -75
  207. souleyez/plugins/john.py +86 -69
  208. souleyez/plugins/katana.py +313 -0
  209. souleyez/plugins/kerbrute.py +237 -0
  210. souleyez/plugins/lfi_extract.py +541 -0
  211. souleyez/plugins/macos_ssh.py +117 -48
  212. souleyez/plugins/mdns.py +35 -30
  213. souleyez/plugins/msf_auxiliary.py +253 -130
  214. souleyez/plugins/msf_exploit.py +239 -161
  215. souleyez/plugins/nikto.py +134 -78
  216. souleyez/plugins/nmap.py +275 -91
  217. souleyez/plugins/nuclei.py +180 -89
  218. souleyez/plugins/nxc.py +285 -0
  219. souleyez/plugins/plugin_base.py +35 -36
  220. souleyez/plugins/plugin_template.py +13 -5
  221. souleyez/plugins/rdp_sec_check.py +130 -0
  222. souleyez/plugins/responder.py +112 -71
  223. souleyez/plugins/router_http_brute.py +76 -65
  224. souleyez/plugins/router_ssh_brute.py +118 -41
  225. souleyez/plugins/router_telnet_brute.py +124 -42
  226. souleyez/plugins/routersploit.py +91 -59
  227. souleyez/plugins/routersploit_exploit.py +77 -55
  228. souleyez/plugins/searchsploit.py +91 -77
  229. souleyez/plugins/service_explorer.py +1160 -0
  230. souleyez/plugins/smbmap.py +122 -72
  231. souleyez/plugins/smbpasswd.py +215 -0
  232. souleyez/plugins/sqlmap.py +301 -113
  233. souleyez/plugins/theharvester.py +127 -75
  234. souleyez/plugins/tr069.py +79 -57
  235. souleyez/plugins/upnp.py +65 -47
  236. souleyez/plugins/upnp_abuse.py +73 -55
  237. souleyez/plugins/vnc_access.py +129 -42
  238. souleyez/plugins/vnc_brute.py +109 -38
  239. souleyez/plugins/whois.py +77 -58
  240. souleyez/plugins/wpscan.py +173 -69
  241. souleyez/reporting/__init__.py +2 -1
  242. souleyez/reporting/attack_chain.py +411 -346
  243. souleyez/reporting/charts.py +436 -501
  244. souleyez/reporting/compliance_mappings.py +334 -201
  245. souleyez/reporting/detection_report.py +126 -125
  246. souleyez/reporting/formatters.py +828 -591
  247. souleyez/reporting/generator.py +386 -302
  248. souleyez/reporting/metrics.py +72 -75
  249. souleyez/scanner.py +35 -29
  250. souleyez/security/__init__.py +37 -11
  251. souleyez/security/scope_validator.py +175 -106
  252. souleyez/security/validation.py +223 -149
  253. souleyez/security.py +22 -6
  254. souleyez/storage/credentials.py +247 -186
  255. souleyez/storage/crypto.py +296 -129
  256. souleyez/storage/database.py +73 -50
  257. souleyez/storage/db.py +58 -36
  258. souleyez/storage/deliverable_evidence.py +177 -128
  259. souleyez/storage/deliverable_exporter.py +282 -246
  260. souleyez/storage/deliverable_templates.py +134 -116
  261. souleyez/storage/deliverables.py +135 -130
  262. souleyez/storage/engagements.py +109 -56
  263. souleyez/storage/evidence.py +181 -152
  264. souleyez/storage/execution_log.py +31 -17
  265. souleyez/storage/exploit_attempts.py +93 -57
  266. souleyez/storage/exploits.py +67 -36
  267. souleyez/storage/findings.py +48 -61
  268. souleyez/storage/hosts.py +176 -144
  269. souleyez/storage/migrate_to_engagements.py +43 -19
  270. souleyez/storage/migrations/_001_add_credential_enhancements.py +22 -12
  271. souleyez/storage/migrations/_002_add_status_tracking.py +10 -7
  272. souleyez/storage/migrations/_003_add_execution_log.py +14 -8
  273. souleyez/storage/migrations/_005_screenshots.py +13 -5
  274. souleyez/storage/migrations/_006_deliverables.py +13 -5
  275. souleyez/storage/migrations/_007_deliverable_templates.py +12 -7
  276. souleyez/storage/migrations/_008_add_nuclei_table.py +10 -4
  277. souleyez/storage/migrations/_010_evidence_linking.py +17 -10
  278. souleyez/storage/migrations/_011_timeline_tracking.py +20 -13
  279. souleyez/storage/migrations/_012_team_collaboration.py +34 -21
  280. souleyez/storage/migrations/_013_add_host_tags.py +12 -6
  281. souleyez/storage/migrations/_014_exploit_attempts.py +22 -10
  282. souleyez/storage/migrations/_015_add_mac_os_fields.py +15 -7
  283. souleyez/storage/migrations/_016_add_domain_field.py +10 -4
  284. souleyez/storage/migrations/_017_msf_sessions.py +16 -8
  285. souleyez/storage/migrations/_018_add_osint_target.py +10 -6
  286. souleyez/storage/migrations/_019_add_engagement_type.py +10 -6
  287. souleyez/storage/migrations/_020_add_rbac.py +36 -15
  288. souleyez/storage/migrations/_021_wazuh_integration.py +20 -8
  289. souleyez/storage/migrations/_022_wazuh_indexer_columns.py +6 -4
  290. souleyez/storage/migrations/_023_fix_detection_results_fk.py +16 -6
  291. souleyez/storage/migrations/_024_wazuh_vulnerabilities.py +26 -10
  292. souleyez/storage/migrations/_025_multi_siem_support.py +3 -5
  293. souleyez/storage/migrations/_026_add_engagement_scope.py +31 -12
  294. souleyez/storage/migrations/_027_multi_siem_persistence.py +32 -15
  295. souleyez/storage/migrations/__init__.py +26 -26
  296. souleyez/storage/migrations/migration_manager.py +19 -19
  297. souleyez/storage/msf_sessions.py +100 -65
  298. souleyez/storage/osint.py +17 -24
  299. souleyez/storage/recommendation_engine.py +269 -235
  300. souleyez/storage/screenshots.py +33 -32
  301. souleyez/storage/smb_shares.py +136 -92
  302. souleyez/storage/sqlmap_data.py +183 -128
  303. souleyez/storage/team_collaboration.py +135 -141
  304. souleyez/storage/timeline_tracker.py +122 -94
  305. souleyez/storage/wazuh_vulns.py +64 -66
  306. souleyez/storage/web_paths.py +33 -37
  307. souleyez/testing/credential_tester.py +221 -205
  308. souleyez/ui/__init__.py +1 -1
  309. souleyez/ui/ai_quotes.py +12 -12
  310. souleyez/ui/attack_surface.py +2439 -1516
  311. souleyez/ui/chain_rules_view.py +914 -382
  312. souleyez/ui/correlation_view.py +312 -230
  313. souleyez/ui/dashboard.py +2382 -1130
  314. souleyez/ui/deliverables_view.py +148 -62
  315. souleyez/ui/design_system.py +13 -13
  316. souleyez/ui/errors.py +49 -49
  317. souleyez/ui/evidence_linking_view.py +284 -179
  318. souleyez/ui/evidence_vault.py +393 -285
  319. souleyez/ui/exploit_suggestions_view.py +555 -349
  320. souleyez/ui/export_view.py +100 -66
  321. souleyez/ui/gap_analysis_view.py +315 -171
  322. souleyez/ui/help_system.py +105 -97
  323. souleyez/ui/intelligence_view.py +436 -293
  324. souleyez/ui/interactive.py +23142 -10430
  325. souleyez/ui/interactive_selector.py +75 -68
  326. souleyez/ui/log_formatter.py +47 -39
  327. souleyez/ui/menu_components.py +22 -13
  328. souleyez/ui/msf_auxiliary_menu.py +184 -133
  329. souleyez/ui/pending_chains_view.py +336 -172
  330. souleyez/ui/progress_indicators.py +5 -3
  331. souleyez/ui/recommendations_view.py +195 -137
  332. souleyez/ui/rule_builder.py +343 -225
  333. souleyez/ui/setup_wizard.py +678 -284
  334. souleyez/ui/shortcuts.py +217 -165
  335. souleyez/ui/splunk_gap_analysis_view.py +452 -270
  336. souleyez/ui/splunk_vulns_view.py +139 -86
  337. souleyez/ui/team_dashboard.py +498 -335
  338. souleyez/ui/template_selector.py +196 -105
  339. souleyez/ui/terminal.py +6 -6
  340. souleyez/ui/timeline_view.py +198 -127
  341. souleyez/ui/tool_setup.py +264 -164
  342. souleyez/ui/tutorial.py +202 -72
  343. souleyez/ui/tutorial_state.py +40 -40
  344. souleyez/ui/wazuh_vulns_view.py +235 -141
  345. souleyez/ui/wordlist_browser.py +260 -107
  346. souleyez/ui.py +464 -312
  347. souleyez/utils/tool_checker.py +427 -367
  348. souleyez/utils.py +33 -29
  349. souleyez/wordlists.py +134 -167
  350. {souleyez-2.43.28.dist-info → souleyez-2.43.32.dist-info}/METADATA +1 -1
  351. souleyez-2.43.32.dist-info/RECORD +441 -0
  352. {souleyez-2.43.28.dist-info → souleyez-2.43.32.dist-info}/WHEEL +1 -1
  353. souleyez-2.43.28.dist-info/RECORD +0 -379
  354. {souleyez-2.43.28.dist-info → souleyez-2.43.32.dist-info}/entry_points.txt +0 -0
  355. {souleyez-2.43.28.dist-info → souleyez-2.43.32.dist-info}/licenses/LICENSE +0 -0
  356. {souleyez-2.43.28.dist-info → souleyez-2.43.32.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,601 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ TheHarvester handler.
4
+
5
+ Consolidates parsing and display logic for theHarvester OSINT jobs.
6
+ """
7
+ import logging
8
+ import os
9
+ import re
10
+ from typing import Any, Dict, List, Optional
11
+ from urllib.parse import urlparse
12
+
13
+ import click
14
+
15
+ from souleyez.engine.job_status import STATUS_DONE, STATUS_NO_RESULTS
16
+ from souleyez.handlers.base import BaseToolHandler
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class TheHarvesterHandler(BaseToolHandler):
22
+ """Handler for theHarvester OSINT jobs."""
23
+
24
+ tool_name = "theharvester"
25
+ display_name = "TheHarvester"
26
+
27
+ # All handlers enabled
28
+ has_error_handler = True
29
+ has_warning_handler = True
30
+ has_no_results_handler = True
31
+ has_done_handler = True
32
+
33
+ # URL patterns for security concerns
34
+ URL_PATTERNS = {
35
+ "auth_endpoints": {
36
+ "patterns": [
37
+ r"/login",
38
+ r"/signin",
39
+ r"/auth",
40
+ r"/oauth",
41
+ r"/sso",
42
+ r"/password",
43
+ r"/forgot",
44
+ ],
45
+ "label": "Authentication endpoint",
46
+ "severity": "medium",
47
+ },
48
+ "admin_panels": {
49
+ "patterns": [
50
+ r"/admin",
51
+ r"/administrator",
52
+ r"/manager",
53
+ r"/console",
54
+ r"/dashboard",
55
+ r"/portal",
56
+ ],
57
+ "label": "Admin/management panel",
58
+ "severity": "high",
59
+ },
60
+ "api_endpoints": {
61
+ "patterns": [
62
+ r"/api/",
63
+ r"/api$",
64
+ r"/graphql",
65
+ r"/rest/",
66
+ r"/v1/",
67
+ r"/v2/",
68
+ r"/swagger",
69
+ r"/openapi",
70
+ ],
71
+ "label": "API endpoint",
72
+ "severity": "medium",
73
+ },
74
+ "file_access": {
75
+ "patterns": [
76
+ r"/upload",
77
+ r"/download",
78
+ r"/files",
79
+ r"/documents",
80
+ r"/attachments",
81
+ ],
82
+ "label": "File access endpoint",
83
+ "severity": "medium",
84
+ },
85
+ "sensitive_pages": {
86
+ "patterns": [
87
+ r"/config",
88
+ r"/settings",
89
+ r"/backup",
90
+ r"/debug",
91
+ r"/phpinfo",
92
+ r"/info\.php",
93
+ r"/test/",
94
+ ],
95
+ "label": "Potentially sensitive page",
96
+ "severity": "high",
97
+ },
98
+ }
99
+
100
+ # Subdomain patterns for security concerns
101
+ SUBDOMAIN_PATTERNS = {
102
+ "dev_staging": {
103
+ "patterns": [
104
+ r"^dev\.",
105
+ r"^staging\.",
106
+ r"^stage\.",
107
+ r"^test\.",
108
+ r"^qa\.",
109
+ r"^uat\.",
110
+ r"^sandbox\.",
111
+ r"^demo\.",
112
+ ],
113
+ "label": "Development/staging environment",
114
+ "severity": "high",
115
+ },
116
+ "internal": {
117
+ "patterns": [
118
+ r"^internal\.",
119
+ r"^intranet\.",
120
+ r"^private\.",
121
+ r"^corp\.",
122
+ r"^vpn\.",
123
+ r"^remote\.",
124
+ ],
125
+ "label": "Internal/corporate system",
126
+ "severity": "high",
127
+ },
128
+ "infrastructure": {
129
+ "patterns": [
130
+ r"^mail\.",
131
+ r"^smtp\.",
132
+ r"^mx\.",
133
+ r"^ftp\.",
134
+ r"^sftp\.",
135
+ r"^ns\d*\.",
136
+ r"^dns\.",
137
+ ],
138
+ "label": "Infrastructure service",
139
+ "severity": "medium",
140
+ },
141
+ "admin_systems": {
142
+ "patterns": [
143
+ r"^admin\.",
144
+ r"^manage\.",
145
+ r"^portal\.",
146
+ r"^panel\.",
147
+ r"^cms\.",
148
+ r"^backend\.",
149
+ ],
150
+ "label": "Administrative system",
151
+ "severity": "high",
152
+ },
153
+ "database": {
154
+ "patterns": [
155
+ r"^db\.",
156
+ r"^database\.",
157
+ r"^mysql\.",
158
+ r"^postgres\.",
159
+ r"^mongo\.",
160
+ r"^redis\.",
161
+ r"^elastic\.",
162
+ ],
163
+ "label": "Database system exposed",
164
+ "severity": "high",
165
+ },
166
+ "cloud_services": {
167
+ "patterns": [
168
+ r"^api\.",
169
+ r"^cdn\.",
170
+ r"^static\.",
171
+ r"^assets\.",
172
+ r"^media\.",
173
+ r"^storage\.",
174
+ r"^s3\.",
175
+ ],
176
+ "label": "Cloud/CDN service",
177
+ "severity": "low",
178
+ },
179
+ }
180
+
181
+ def parse_job(
182
+ self,
183
+ engagement_id: int,
184
+ log_path: str,
185
+ job: Dict[str, Any],
186
+ host_manager: Optional[Any] = None,
187
+ findings_manager: Optional[Any] = None,
188
+ credentials_manager: Optional[Any] = None,
189
+ ) -> Dict[str, Any]:
190
+ """
191
+ Parse theHarvester job results.
192
+
193
+ Extracts OSINT data and stores it in the database.
194
+ """
195
+ try:
196
+ from souleyez.parsers.theharvester_parser import (
197
+ parse_theharvester_output,
198
+ get_osint_stats,
199
+ )
200
+
201
+ # Import managers if not provided
202
+ if host_manager is None:
203
+ from souleyez.storage.hosts import HostManager
204
+
205
+ host_manager = HostManager()
206
+
207
+ # Read the log file
208
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
209
+ log_content = f.read()
210
+
211
+ # Parse theHarvester output
212
+ target = job.get("target", "")
213
+ parsed = parse_theharvester_output(log_content, target)
214
+
215
+ # Store OSINT data
216
+ from souleyez.storage.osint import OsintManager
217
+
218
+ om = OsintManager()
219
+ osint_added = 0
220
+
221
+ # Add emails
222
+ if parsed["emails"]:
223
+ count = om.bulk_add_osint_data(
224
+ engagement_id, "email", parsed["emails"], "theHarvester", target
225
+ )
226
+ osint_added += count
227
+
228
+ # Add hosts/subdomains
229
+ if parsed["hosts"]:
230
+ count = om.bulk_add_osint_data(
231
+ engagement_id, "host", parsed["hosts"], "theHarvester", target
232
+ )
233
+ osint_added += count
234
+
235
+ # Add IPs
236
+ if parsed["ips"]:
237
+ count = om.bulk_add_osint_data(
238
+ engagement_id, "ip", parsed["ips"], "theHarvester", target
239
+ )
240
+ osint_added += count
241
+
242
+ # Add URLs
243
+ if parsed["urls"]:
244
+ count = om.bulk_add_osint_data(
245
+ engagement_id, "url", parsed["urls"], "theHarvester", target
246
+ )
247
+ osint_added += count
248
+
249
+ # Add ASNs
250
+ if parsed["asns"]:
251
+ count = om.bulk_add_osint_data(
252
+ engagement_id, "asn", parsed["asns"], "theHarvester", target
253
+ )
254
+ osint_added += count
255
+
256
+ # Also add discovered IPs and hosts to the hosts table
257
+ hosts_added = 0
258
+ for ip in parsed["ips"]:
259
+ try:
260
+ host_id = host_manager.add_or_update_host(
261
+ engagement_id, {"ip": ip, "status": "discovered"}
262
+ )
263
+ hosts_added += 1
264
+ logger.debug(f"Added IP {ip} to hosts table (host_id={host_id})")
265
+ except Exception as e:
266
+ logger.warning(f"Failed to add IP {ip} to hosts: {e}")
267
+
268
+ stats = get_osint_stats(parsed)
269
+
270
+ # Determine status based on results
271
+ total_osint_found = (
272
+ len(parsed["emails"])
273
+ + len(parsed["hosts"])
274
+ + len(parsed["ips"])
275
+ + len(parsed["urls"])
276
+ )
277
+ if total_osint_found > 0:
278
+ status = STATUS_DONE
279
+ else:
280
+ status = STATUS_NO_RESULTS
281
+
282
+ return {
283
+ "tool": "theHarvester",
284
+ "status": status,
285
+ "osint_added": osint_added,
286
+ "hosts_added": hosts_added,
287
+ "stats": stats,
288
+ "domains": [target] if target else [],
289
+ "target": target,
290
+ "urls": parsed["urls"],
291
+ "ips": parsed["ips"],
292
+ }
293
+ except Exception as e:
294
+ logger.error(f"Error parsing theHarvester job: {e}")
295
+ return {"error": str(e)}
296
+
297
+ def _identify_security_concerns(
298
+ self, urls: List[str], subdomains: List[str]
299
+ ) -> List[Dict]:
300
+ """Identify security concerns in discovered URLs and subdomains."""
301
+ security_concerns = []
302
+
303
+ # Check URLs
304
+ for url in urls:
305
+ try:
306
+ parsed_url = urlparse(url)
307
+ url_path = parsed_url.path.lower()
308
+ if not url_path or url_path == "/":
309
+ continue
310
+ except Exception:
311
+ continue
312
+
313
+ for concern_type, concern_info in self.URL_PATTERNS.items():
314
+ matched = False
315
+ for pattern in concern_info["patterns"]:
316
+ if re.search(pattern, url_path, re.IGNORECASE):
317
+ security_concerns.append(
318
+ {
319
+ "item": url,
320
+ "type": concern_type,
321
+ "label": concern_info["label"],
322
+ "severity": concern_info["severity"],
323
+ "category": "url",
324
+ }
325
+ )
326
+ matched = True
327
+ break
328
+ if matched:
329
+ break
330
+
331
+ # Check subdomains
332
+ for sub in subdomains:
333
+ sub_lower = sub.lower()
334
+ for concern_type, concern_info in self.SUBDOMAIN_PATTERNS.items():
335
+ matched = False
336
+ for pattern in concern_info["patterns"]:
337
+ if re.search(pattern, sub_lower, re.IGNORECASE):
338
+ security_concerns.append(
339
+ {
340
+ "item": sub,
341
+ "type": concern_type,
342
+ "label": concern_info["label"],
343
+ "severity": concern_info["severity"],
344
+ "category": "subdomain",
345
+ }
346
+ )
347
+ matched = True
348
+ break
349
+ if matched:
350
+ break
351
+
352
+ return security_concerns
353
+
354
+ def display_done(
355
+ self,
356
+ job: Dict[str, Any],
357
+ log_path: str,
358
+ show_all: bool = False,
359
+ show_passwords: bool = False,
360
+ ) -> None:
361
+ """Display successful theHarvester scan results."""
362
+ try:
363
+ from souleyez.parsers.theharvester_parser import parse_theharvester_output
364
+
365
+ if not log_path or not os.path.exists(log_path):
366
+ return
367
+
368
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
369
+ log_content = f.read()
370
+ parsed = parse_theharvester_output(log_content, job.get("target", ""))
371
+
372
+ # Collect all results
373
+ emails = parsed.get("emails", [])
374
+ ips = parsed.get("ips", [])
375
+ asns = parsed.get("asns", [])
376
+ urls = parsed.get("urls", parsed.get("base_urls", []))
377
+ subdomains = parsed.get("subdomains", [])
378
+
379
+ has_results = emails or ips or asns or urls or subdomains
380
+
381
+ # Run security analysis
382
+ security_concerns = self._identify_security_concerns(urls, subdomains)
383
+
384
+ # Display security concerns FIRST
385
+ if security_concerns:
386
+ self._display_security_concerns(security_concerns)
387
+
388
+ # Display discovered assets
389
+ click.echo(click.style("=" * 70, fg="cyan"))
390
+ click.echo(click.style("DISCOVERED ASSETS", bold=True, fg="cyan"))
391
+ click.echo(click.style("=" * 70, fg="cyan"))
392
+ click.echo()
393
+
394
+ click.echo(
395
+ click.style(f"Target: {job.get('target', 'unknown')}", bold=True)
396
+ )
397
+ click.echo()
398
+
399
+ if has_results:
400
+ max_items = None if show_all else 10
401
+ max_urls = None if show_all else 15
402
+
403
+ # Emails
404
+ if emails:
405
+ self._display_list("Emails", emails, max_items)
406
+
407
+ # IPs
408
+ if ips:
409
+ self._display_list("IP Addresses", ips, max_items)
410
+
411
+ # ASNs
412
+ if asns:
413
+ self._display_list("ASNs", asns, max_items)
414
+
415
+ # URLs
416
+ if urls:
417
+ self._display_list("Interesting URLs", urls, max_urls)
418
+
419
+ # Subdomains
420
+ if subdomains:
421
+ self._display_list("Hosts Found", subdomains, max_urls)
422
+ else:
423
+ self.display_no_results(job, log_path)
424
+ return
425
+
426
+ click.echo(click.style("=" * 70, fg="cyan"))
427
+ click.echo()
428
+
429
+ except Exception as e:
430
+ logger.debug(f"Error in display_done: {e}")
431
+
432
+ def _display_list(
433
+ self, title: str, items: List[str], max_items: Optional[int]
434
+ ) -> None:
435
+ """Display a list of items with optional truncation."""
436
+ click.echo(click.style(f"{title}: {len(items)}", bold=True))
437
+ display_items = items if max_items is None else items[:max_items]
438
+ for item in display_items:
439
+ click.echo(f" - {item}")
440
+ if max_items and len(items) > max_items:
441
+ click.echo(f" ... and {len(items) - max_items} more")
442
+ click.echo()
443
+
444
+ def _display_security_concerns(self, security_concerns: List[Dict]) -> None:
445
+ """Display security concerns grouped by severity."""
446
+ click.echo(click.style("=" * 70, fg="red"))
447
+ click.echo(click.style("SECURITY CONCERNS", bold=True, fg="red"))
448
+ click.echo(click.style("=" * 70, fg="red"))
449
+ click.echo()
450
+
451
+ # Group by severity
452
+ high_concerns = [c for c in security_concerns if c["severity"] == "high"]
453
+ medium_concerns = [c for c in security_concerns if c["severity"] == "medium"]
454
+ low_concerns = [c for c in security_concerns if c["severity"] == "low"]
455
+
456
+ if high_concerns:
457
+ click.echo(click.style("[HIGH] Critical findings:", fg="red", bold=True))
458
+ by_label = {}
459
+ for c in high_concerns:
460
+ if c["label"] not in by_label:
461
+ by_label[c["label"]] = []
462
+ by_label[c["label"]].append(c["item"])
463
+ for label, items in by_label.items():
464
+ click.echo(f" {label}:")
465
+ for item in items[:5]:
466
+ click.echo(f" - {item}")
467
+ if len(items) > 5:
468
+ click.echo(f" ... and {len(items) - 5} more")
469
+ click.echo()
470
+
471
+ if medium_concerns:
472
+ click.echo(
473
+ click.style("[MEDIUM] Notable findings:", fg="yellow", bold=True)
474
+ )
475
+ by_label = {}
476
+ for c in medium_concerns:
477
+ if c["label"] not in by_label:
478
+ by_label[c["label"]] = []
479
+ by_label[c["label"]].append(c["item"])
480
+ for label, items in by_label.items():
481
+ click.echo(f" {label}:")
482
+ for item in items[:5]:
483
+ click.echo(f" - {item}")
484
+ if len(items) > 5:
485
+ click.echo(f" ... and {len(items) - 5} more")
486
+ click.echo()
487
+
488
+ if low_concerns:
489
+ click.echo(
490
+ click.style("[LOW] Informational:", fg="bright_black", bold=True)
491
+ )
492
+ by_label = {}
493
+ for c in low_concerns:
494
+ if c["label"] not in by_label:
495
+ by_label[c["label"]] = []
496
+ by_label[c["label"]].append(c["item"])
497
+ for label, items in by_label.items():
498
+ click.echo(f" {label}:")
499
+ for item in items[:3]:
500
+ click.echo(f" - {item}")
501
+ if len(items) > 3:
502
+ click.echo(f" ... and {len(items) - 3} more")
503
+ click.echo()
504
+
505
+ click.echo(click.style("=" * 70, fg="red"))
506
+ click.echo()
507
+
508
+ def display_warning(
509
+ self,
510
+ job: Dict[str, Any],
511
+ log_path: str,
512
+ log_content: Optional[str] = None,
513
+ ) -> None:
514
+ """Display warning status for theHarvester scan."""
515
+ click.echo(click.style("=" * 70, fg="yellow"))
516
+ click.echo(click.style("[WARNING] THEHARVESTER SCAN", bold=True, fg="yellow"))
517
+ click.echo(click.style("=" * 70, fg="yellow"))
518
+ click.echo()
519
+ click.echo(" Scan completed with warnings. Check raw logs for details.")
520
+ click.echo(" Press [r] to view raw logs.")
521
+ click.echo()
522
+ click.echo(click.style("=" * 70, fg="yellow"))
523
+ click.echo()
524
+
525
+ def display_error(
526
+ self,
527
+ job: Dict[str, Any],
528
+ log_path: str,
529
+ log_content: Optional[str] = None,
530
+ ) -> None:
531
+ """Display error status for theHarvester scan."""
532
+ # Read log if not provided
533
+ if log_content is None and log_path and os.path.exists(log_path):
534
+ try:
535
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
536
+ log_content = f.read()
537
+ except Exception:
538
+ log_content = ""
539
+
540
+ click.echo(click.style("=" * 70, fg="red"))
541
+ click.echo(click.style("[ERROR] THEHARVESTER FAILED", bold=True, fg="red"))
542
+ click.echo(click.style("=" * 70, fg="red"))
543
+ click.echo()
544
+
545
+ # Check for common theharvester errors
546
+ error_msg = None
547
+ if log_content:
548
+ if "No results found" in log_content:
549
+ error_msg = "No results found for the specified domain"
550
+ elif "Could not resolve" in log_content or (
551
+ "DNS" in log_content and "fail" in log_content.lower()
552
+ ):
553
+ error_msg = "Could not resolve domain"
554
+ elif "timed out" in log_content.lower() or "timeout" in log_content.lower():
555
+ error_msg = "Connection timed out - source may be slow"
556
+ elif (
557
+ "rate limit" in log_content.lower() or "blocked" in log_content.lower()
558
+ ):
559
+ error_msg = "Rate limited or blocked by source"
560
+ elif "API" in log_content and (
561
+ "key" in log_content.lower() or "error" in log_content.lower()
562
+ ):
563
+ error_msg = "API key error - check your API keys configuration"
564
+ elif "[-]" in log_content:
565
+ match = re.search(r"\[-\]\s*(.+?)(?:\n|$)", log_content)
566
+ if match:
567
+ error_msg = match.group(1).strip()[:100]
568
+
569
+ if error_msg:
570
+ click.echo(f" {error_msg}")
571
+ else:
572
+ click.echo(" Scan failed - see raw logs for details (press 'r')")
573
+
574
+ click.echo()
575
+ click.echo(click.style("=" * 70, fg="red"))
576
+ click.echo()
577
+
578
+ def display_no_results(
579
+ self,
580
+ job: Dict[str, Any],
581
+ log_path: str,
582
+ ) -> None:
583
+ """Display no_results status for theHarvester scan."""
584
+ click.echo(click.style("=" * 70, fg="cyan"))
585
+ click.echo(click.style("DISCOVERED ASSETS", bold=True, fg="cyan"))
586
+ click.echo(click.style("=" * 70, fg="cyan"))
587
+ click.echo()
588
+
589
+ click.echo(click.style(f"Target: {job.get('target', 'unknown')}", bold=True))
590
+ click.echo()
591
+ click.echo(click.style("Result: No assets discovered", fg="yellow", bold=True))
592
+ click.echo()
593
+ click.echo(" The scan completed without finding any publicly exposed assets.")
594
+ click.echo()
595
+ click.echo(click.style("Tips:", dim=True))
596
+ click.echo(" - Try different data sources (-b google,bing,linkedin)")
597
+ click.echo(" - Check if the domain is correct")
598
+ click.echo(" - Some organizations have minimal public exposure")
599
+ click.echo()
600
+ click.echo(click.style("=" * 70, fg="cyan"))
601
+ click.echo()