souleyez 2.43.29__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. souleyez/__init__.py +1 -2
  2. souleyez/ai/__init__.py +21 -15
  3. souleyez/ai/action_mapper.py +249 -150
  4. souleyez/ai/chain_advisor.py +116 -100
  5. souleyez/ai/claude_provider.py +29 -28
  6. souleyez/ai/context_builder.py +80 -62
  7. souleyez/ai/executor.py +158 -117
  8. souleyez/ai/feedback_handler.py +136 -121
  9. souleyez/ai/llm_factory.py +27 -20
  10. souleyez/ai/llm_provider.py +4 -2
  11. souleyez/ai/ollama_provider.py +6 -9
  12. souleyez/ai/ollama_service.py +44 -37
  13. souleyez/ai/path_scorer.py +91 -76
  14. souleyez/ai/recommender.py +176 -144
  15. souleyez/ai/report_context.py +74 -73
  16. souleyez/ai/report_service.py +84 -66
  17. souleyez/ai/result_parser.py +222 -229
  18. souleyez/ai/safety.py +67 -44
  19. souleyez/auth/__init__.py +23 -22
  20. souleyez/auth/audit.py +36 -26
  21. souleyez/auth/engagement_access.py +65 -48
  22. souleyez/auth/permissions.py +14 -3
  23. souleyez/auth/session_manager.py +54 -37
  24. souleyez/auth/user_manager.py +109 -64
  25. souleyez/commands/audit.py +40 -43
  26. souleyez/commands/auth.py +35 -15
  27. souleyez/commands/deliverables.py +55 -50
  28. souleyez/commands/engagement.py +47 -28
  29. souleyez/commands/license.py +32 -23
  30. souleyez/commands/screenshots.py +36 -32
  31. souleyez/commands/user.py +82 -36
  32. souleyez/config.py +52 -44
  33. souleyez/core/credential_tester.py +87 -81
  34. souleyez/core/cve_mappings.py +179 -192
  35. souleyez/core/cve_matcher.py +162 -148
  36. souleyez/core/msf_auto_mapper.py +100 -83
  37. souleyez/core/msf_chain_engine.py +294 -256
  38. souleyez/core/msf_database.py +153 -70
  39. souleyez/core/msf_integration.py +679 -673
  40. souleyez/core/msf_rpc_client.py +40 -42
  41. souleyez/core/msf_rpc_manager.py +77 -79
  42. souleyez/core/msf_sync_manager.py +241 -181
  43. souleyez/core/network_utils.py +22 -15
  44. souleyez/core/parser_handler.py +34 -25
  45. souleyez/core/pending_chains.py +114 -63
  46. souleyez/core/templates.py +158 -107
  47. souleyez/core/tool_chaining.py +9564 -2881
  48. souleyez/core/version_utils.py +79 -94
  49. souleyez/core/vuln_correlation.py +136 -89
  50. souleyez/core/web_utils.py +33 -32
  51. souleyez/data/wordlists/ad_users.txt +378 -0
  52. souleyez/data/wordlists/api_endpoints_large.txt +769 -0
  53. souleyez/data/wordlists/home_dir_sensitive.txt +39 -0
  54. souleyez/data/wordlists/lfi_payloads.txt +82 -0
  55. souleyez/data/wordlists/passwords_brute.txt +1548 -0
  56. souleyez/data/wordlists/passwords_crack.txt +2479 -0
  57. souleyez/data/wordlists/passwords_spray.txt +386 -0
  58. souleyez/data/wordlists/subdomains_large.txt +5057 -0
  59. souleyez/data/wordlists/usernames_common.txt +694 -0
  60. souleyez/data/wordlists/web_dirs_large.txt +4769 -0
  61. souleyez/detection/__init__.py +1 -1
  62. souleyez/detection/attack_signatures.py +12 -17
  63. souleyez/detection/mitre_mappings.py +61 -55
  64. souleyez/detection/validator.py +97 -86
  65. souleyez/devtools.py +23 -10
  66. souleyez/docs/README.md +4 -4
  67. souleyez/docs/api-reference/cli-commands.md +2 -2
  68. souleyez/docs/developer-guide/adding-new-tools.md +562 -0
  69. souleyez/docs/user-guide/auto-chaining.md +30 -8
  70. souleyez/docs/user-guide/getting-started.md +1 -1
  71. souleyez/docs/user-guide/installation.md +26 -3
  72. souleyez/docs/user-guide/metasploit-integration.md +2 -2
  73. souleyez/docs/user-guide/rbac.md +1 -1
  74. souleyez/docs/user-guide/scope-management.md +1 -1
  75. souleyez/docs/user-guide/siem-integration.md +1 -1
  76. souleyez/docs/user-guide/tools-reference.md +1 -8
  77. souleyez/docs/user-guide/worker-management.md +1 -1
  78. souleyez/engine/background.py +1239 -535
  79. souleyez/engine/base.py +4 -1
  80. souleyez/engine/job_status.py +17 -49
  81. souleyez/engine/log_sanitizer.py +103 -77
  82. souleyez/engine/manager.py +38 -7
  83. souleyez/engine/result_handler.py +2200 -1550
  84. souleyez/engine/worker_manager.py +50 -41
  85. souleyez/export/evidence_bundle.py +72 -62
  86. souleyez/feature_flags/features.py +16 -20
  87. souleyez/feature_flags.py +5 -9
  88. souleyez/handlers/__init__.py +11 -0
  89. souleyez/handlers/base.py +188 -0
  90. souleyez/handlers/bash_handler.py +277 -0
  91. souleyez/handlers/bloodhound_handler.py +243 -0
  92. souleyez/handlers/certipy_handler.py +311 -0
  93. souleyez/handlers/crackmapexec_handler.py +486 -0
  94. souleyez/handlers/dnsrecon_handler.py +344 -0
  95. souleyez/handlers/enum4linux_handler.py +400 -0
  96. souleyez/handlers/evil_winrm_handler.py +493 -0
  97. souleyez/handlers/ffuf_handler.py +815 -0
  98. souleyez/handlers/gobuster_handler.py +1114 -0
  99. souleyez/handlers/gpp_extract_handler.py +334 -0
  100. souleyez/handlers/hashcat_handler.py +444 -0
  101. souleyez/handlers/hydra_handler.py +564 -0
  102. souleyez/handlers/impacket_getuserspns_handler.py +343 -0
  103. souleyez/handlers/impacket_psexec_handler.py +222 -0
  104. souleyez/handlers/impacket_secretsdump_handler.py +426 -0
  105. souleyez/handlers/john_handler.py +286 -0
  106. souleyez/handlers/katana_handler.py +425 -0
  107. souleyez/handlers/kerbrute_handler.py +298 -0
  108. souleyez/handlers/ldapsearch_handler.py +636 -0
  109. souleyez/handlers/lfi_extract_handler.py +464 -0
  110. souleyez/handlers/msf_auxiliary_handler.py +409 -0
  111. souleyez/handlers/msf_exploit_handler.py +380 -0
  112. souleyez/handlers/nikto_handler.py +413 -0
  113. souleyez/handlers/nmap_handler.py +821 -0
  114. souleyez/handlers/nuclei_handler.py +359 -0
  115. souleyez/handlers/nxc_handler.py +417 -0
  116. souleyez/handlers/rdp_sec_check_handler.py +353 -0
  117. souleyez/handlers/registry.py +292 -0
  118. souleyez/handlers/responder_handler.py +232 -0
  119. souleyez/handlers/service_explorer_handler.py +434 -0
  120. souleyez/handlers/smbclient_handler.py +344 -0
  121. souleyez/handlers/smbmap_handler.py +510 -0
  122. souleyez/handlers/smbpasswd_handler.py +296 -0
  123. souleyez/handlers/sqlmap_handler.py +1116 -0
  124. souleyez/handlers/theharvester_handler.py +601 -0
  125. souleyez/handlers/web_login_test_handler.py +327 -0
  126. souleyez/handlers/whois_handler.py +277 -0
  127. souleyez/handlers/wpscan_handler.py +554 -0
  128. souleyez/history.py +32 -16
  129. souleyez/importers/msf_importer.py +106 -75
  130. souleyez/importers/smart_importer.py +208 -147
  131. souleyez/integrations/siem/__init__.py +10 -10
  132. souleyez/integrations/siem/base.py +17 -18
  133. souleyez/integrations/siem/elastic.py +108 -122
  134. souleyez/integrations/siem/factory.py +207 -80
  135. souleyez/integrations/siem/googlesecops.py +146 -154
  136. souleyez/integrations/siem/rule_mappings/__init__.py +1 -1
  137. souleyez/integrations/siem/rule_mappings/wazuh_rules.py +8 -5
  138. souleyez/integrations/siem/sentinel.py +107 -109
  139. souleyez/integrations/siem/splunk.py +246 -212
  140. souleyez/integrations/siem/wazuh.py +65 -71
  141. souleyez/integrations/wazuh/__init__.py +5 -5
  142. souleyez/integrations/wazuh/client.py +70 -93
  143. souleyez/integrations/wazuh/config.py +85 -57
  144. souleyez/integrations/wazuh/host_mapper.py +28 -36
  145. souleyez/integrations/wazuh/sync.py +78 -68
  146. souleyez/intelligence/__init__.py +4 -5
  147. souleyez/intelligence/correlation_analyzer.py +309 -295
  148. souleyez/intelligence/exploit_knowledge.py +661 -623
  149. souleyez/intelligence/exploit_suggestions.py +159 -139
  150. souleyez/intelligence/gap_analyzer.py +132 -97
  151. souleyez/intelligence/gap_detector.py +251 -214
  152. souleyez/intelligence/sensitive_tables.py +266 -129
  153. souleyez/intelligence/service_parser.py +137 -123
  154. souleyez/intelligence/surface_analyzer.py +407 -268
  155. souleyez/intelligence/target_parser.py +159 -162
  156. souleyez/licensing/__init__.py +6 -6
  157. souleyez/licensing/validator.py +17 -19
  158. souleyez/log_config.py +79 -54
  159. souleyez/main.py +1505 -687
  160. souleyez/migrations/fix_job_counter.py +16 -14
  161. souleyez/parsers/bloodhound_parser.py +41 -39
  162. souleyez/parsers/crackmapexec_parser.py +178 -111
  163. souleyez/parsers/dalfox_parser.py +72 -77
  164. souleyez/parsers/dnsrecon_parser.py +103 -91
  165. souleyez/parsers/enum4linux_parser.py +183 -153
  166. souleyez/parsers/ffuf_parser.py +29 -25
  167. souleyez/parsers/gobuster_parser.py +301 -41
  168. souleyez/parsers/hashcat_parser.py +324 -79
  169. souleyez/parsers/http_fingerprint_parser.py +350 -103
  170. souleyez/parsers/hydra_parser.py +131 -111
  171. souleyez/parsers/impacket_parser.py +231 -178
  172. souleyez/parsers/john_parser.py +98 -86
  173. souleyez/parsers/katana_parser.py +316 -0
  174. souleyez/parsers/msf_parser.py +943 -498
  175. souleyez/parsers/nikto_parser.py +346 -65
  176. souleyez/parsers/nmap_parser.py +262 -174
  177. souleyez/parsers/nuclei_parser.py +40 -44
  178. souleyez/parsers/responder_parser.py +26 -26
  179. souleyez/parsers/searchsploit_parser.py +74 -74
  180. souleyez/parsers/service_explorer_parser.py +279 -0
  181. souleyez/parsers/smbmap_parser.py +180 -124
  182. souleyez/parsers/sqlmap_parser.py +434 -308
  183. souleyez/parsers/theharvester_parser.py +75 -57
  184. souleyez/parsers/whois_parser.py +135 -94
  185. souleyez/parsers/wpscan_parser.py +278 -190
  186. souleyez/plugins/afp.py +44 -36
  187. souleyez/plugins/afp_brute.py +114 -46
  188. souleyez/plugins/ard.py +48 -37
  189. souleyez/plugins/bloodhound.py +95 -61
  190. souleyez/plugins/certipy.py +303 -0
  191. souleyez/plugins/crackmapexec.py +186 -85
  192. souleyez/plugins/dalfox.py +120 -59
  193. souleyez/plugins/dns_hijack.py +146 -41
  194. souleyez/plugins/dnsrecon.py +97 -61
  195. souleyez/plugins/enum4linux.py +91 -66
  196. souleyez/plugins/evil_winrm.py +291 -0
  197. souleyez/plugins/ffuf.py +166 -90
  198. souleyez/plugins/firmware_extract.py +133 -29
  199. souleyez/plugins/gobuster.py +387 -190
  200. souleyez/plugins/gpp_extract.py +393 -0
  201. souleyez/plugins/hashcat.py +100 -73
  202. souleyez/plugins/http_fingerprint.py +913 -267
  203. souleyez/plugins/hydra.py +566 -200
  204. souleyez/plugins/impacket_getnpusers.py +117 -69
  205. souleyez/plugins/impacket_psexec.py +84 -64
  206. souleyez/plugins/impacket_secretsdump.py +103 -69
  207. souleyez/plugins/impacket_smbclient.py +89 -75
  208. souleyez/plugins/john.py +86 -69
  209. souleyez/plugins/katana.py +313 -0
  210. souleyez/plugins/kerbrute.py +237 -0
  211. souleyez/plugins/lfi_extract.py +541 -0
  212. souleyez/plugins/macos_ssh.py +117 -48
  213. souleyez/plugins/mdns.py +35 -30
  214. souleyez/plugins/msf_auxiliary.py +253 -130
  215. souleyez/plugins/msf_exploit.py +239 -161
  216. souleyez/plugins/nikto.py +134 -78
  217. souleyez/plugins/nmap.py +275 -91
  218. souleyez/plugins/nuclei.py +180 -89
  219. souleyez/plugins/nxc.py +285 -0
  220. souleyez/plugins/plugin_base.py +35 -36
  221. souleyez/plugins/plugin_template.py +13 -5
  222. souleyez/plugins/rdp_sec_check.py +130 -0
  223. souleyez/plugins/responder.py +112 -71
  224. souleyez/plugins/router_http_brute.py +76 -65
  225. souleyez/plugins/router_ssh_brute.py +118 -41
  226. souleyez/plugins/router_telnet_brute.py +124 -42
  227. souleyez/plugins/routersploit.py +91 -59
  228. souleyez/plugins/routersploit_exploit.py +77 -55
  229. souleyez/plugins/searchsploit.py +91 -77
  230. souleyez/plugins/service_explorer.py +1160 -0
  231. souleyez/plugins/smbmap.py +122 -72
  232. souleyez/plugins/smbpasswd.py +215 -0
  233. souleyez/plugins/sqlmap.py +301 -113
  234. souleyez/plugins/theharvester.py +127 -75
  235. souleyez/plugins/tr069.py +79 -57
  236. souleyez/plugins/upnp.py +65 -47
  237. souleyez/plugins/upnp_abuse.py +73 -55
  238. souleyez/plugins/vnc_access.py +129 -42
  239. souleyez/plugins/vnc_brute.py +109 -38
  240. souleyez/plugins/web_login_test.py +417 -0
  241. souleyez/plugins/whois.py +77 -58
  242. souleyez/plugins/wpscan.py +219 -69
  243. souleyez/reporting/__init__.py +2 -1
  244. souleyez/reporting/attack_chain.py +411 -346
  245. souleyez/reporting/charts.py +436 -501
  246. souleyez/reporting/compliance_mappings.py +334 -201
  247. souleyez/reporting/detection_report.py +126 -125
  248. souleyez/reporting/formatters.py +828 -591
  249. souleyez/reporting/generator.py +386 -302
  250. souleyez/reporting/metrics.py +72 -75
  251. souleyez/scanner.py +35 -29
  252. souleyez/security/__init__.py +37 -11
  253. souleyez/security/scope_validator.py +175 -106
  254. souleyez/security/validation.py +237 -149
  255. souleyez/security.py +22 -6
  256. souleyez/storage/credentials.py +247 -186
  257. souleyez/storage/crypto.py +296 -129
  258. souleyez/storage/database.py +73 -50
  259. souleyez/storage/db.py +58 -36
  260. souleyez/storage/deliverable_evidence.py +177 -128
  261. souleyez/storage/deliverable_exporter.py +282 -246
  262. souleyez/storage/deliverable_templates.py +134 -116
  263. souleyez/storage/deliverables.py +135 -130
  264. souleyez/storage/engagements.py +109 -56
  265. souleyez/storage/evidence.py +181 -152
  266. souleyez/storage/execution_log.py +31 -17
  267. souleyez/storage/exploit_attempts.py +93 -57
  268. souleyez/storage/exploits.py +67 -36
  269. souleyez/storage/findings.py +48 -61
  270. souleyez/storage/hosts.py +176 -144
  271. souleyez/storage/migrate_to_engagements.py +43 -19
  272. souleyez/storage/migrations/_001_add_credential_enhancements.py +22 -12
  273. souleyez/storage/migrations/_002_add_status_tracking.py +10 -7
  274. souleyez/storage/migrations/_003_add_execution_log.py +14 -8
  275. souleyez/storage/migrations/_005_screenshots.py +13 -5
  276. souleyez/storage/migrations/_006_deliverables.py +13 -5
  277. souleyez/storage/migrations/_007_deliverable_templates.py +12 -7
  278. souleyez/storage/migrations/_008_add_nuclei_table.py +10 -4
  279. souleyez/storage/migrations/_010_evidence_linking.py +17 -10
  280. souleyez/storage/migrations/_011_timeline_tracking.py +20 -13
  281. souleyez/storage/migrations/_012_team_collaboration.py +34 -21
  282. souleyez/storage/migrations/_013_add_host_tags.py +12 -6
  283. souleyez/storage/migrations/_014_exploit_attempts.py +22 -10
  284. souleyez/storage/migrations/_015_add_mac_os_fields.py +15 -7
  285. souleyez/storage/migrations/_016_add_domain_field.py +10 -4
  286. souleyez/storage/migrations/_017_msf_sessions.py +16 -8
  287. souleyez/storage/migrations/_018_add_osint_target.py +10 -6
  288. souleyez/storage/migrations/_019_add_engagement_type.py +10 -6
  289. souleyez/storage/migrations/_020_add_rbac.py +36 -15
  290. souleyez/storage/migrations/_021_wazuh_integration.py +20 -8
  291. souleyez/storage/migrations/_022_wazuh_indexer_columns.py +6 -4
  292. souleyez/storage/migrations/_023_fix_detection_results_fk.py +16 -6
  293. souleyez/storage/migrations/_024_wazuh_vulnerabilities.py +26 -10
  294. souleyez/storage/migrations/_025_multi_siem_support.py +3 -5
  295. souleyez/storage/migrations/_026_add_engagement_scope.py +31 -12
  296. souleyez/storage/migrations/_027_multi_siem_persistence.py +32 -15
  297. souleyez/storage/migrations/__init__.py +26 -26
  298. souleyez/storage/migrations/migration_manager.py +19 -19
  299. souleyez/storage/msf_sessions.py +100 -65
  300. souleyez/storage/osint.py +17 -24
  301. souleyez/storage/recommendation_engine.py +269 -235
  302. souleyez/storage/screenshots.py +33 -32
  303. souleyez/storage/smb_shares.py +136 -92
  304. souleyez/storage/sqlmap_data.py +183 -128
  305. souleyez/storage/team_collaboration.py +135 -141
  306. souleyez/storage/timeline_tracker.py +122 -94
  307. souleyez/storage/wazuh_vulns.py +64 -66
  308. souleyez/storage/web_paths.py +33 -37
  309. souleyez/testing/credential_tester.py +221 -205
  310. souleyez/ui/__init__.py +1 -1
  311. souleyez/ui/ai_quotes.py +12 -12
  312. souleyez/ui/attack_surface.py +2439 -1516
  313. souleyez/ui/chain_rules_view.py +914 -382
  314. souleyez/ui/correlation_view.py +312 -230
  315. souleyez/ui/dashboard.py +2382 -1130
  316. souleyez/ui/deliverables_view.py +148 -62
  317. souleyez/ui/design_system.py +13 -13
  318. souleyez/ui/errors.py +49 -49
  319. souleyez/ui/evidence_linking_view.py +284 -179
  320. souleyez/ui/evidence_vault.py +393 -285
  321. souleyez/ui/exploit_suggestions_view.py +555 -349
  322. souleyez/ui/export_view.py +100 -66
  323. souleyez/ui/gap_analysis_view.py +315 -171
  324. souleyez/ui/help_system.py +105 -97
  325. souleyez/ui/intelligence_view.py +436 -293
  326. souleyez/ui/interactive.py +23034 -10679
  327. souleyez/ui/interactive_selector.py +75 -68
  328. souleyez/ui/log_formatter.py +47 -39
  329. souleyez/ui/menu_components.py +22 -13
  330. souleyez/ui/msf_auxiliary_menu.py +184 -133
  331. souleyez/ui/pending_chains_view.py +336 -172
  332. souleyez/ui/progress_indicators.py +5 -3
  333. souleyez/ui/recommendations_view.py +195 -137
  334. souleyez/ui/rule_builder.py +343 -225
  335. souleyez/ui/setup_wizard.py +678 -284
  336. souleyez/ui/shortcuts.py +217 -165
  337. souleyez/ui/splunk_gap_analysis_view.py +452 -270
  338. souleyez/ui/splunk_vulns_view.py +139 -86
  339. souleyez/ui/team_dashboard.py +498 -335
  340. souleyez/ui/template_selector.py +196 -105
  341. souleyez/ui/terminal.py +6 -6
  342. souleyez/ui/timeline_view.py +198 -127
  343. souleyez/ui/tool_setup.py +264 -164
  344. souleyez/ui/tutorial.py +202 -72
  345. souleyez/ui/tutorial_state.py +40 -40
  346. souleyez/ui/wazuh_vulns_view.py +235 -141
  347. souleyez/ui/wordlist_browser.py +260 -107
  348. souleyez/ui.py +464 -312
  349. souleyez/utils/tool_checker.py +427 -367
  350. souleyez/utils.py +33 -29
  351. souleyez/wordlists.py +134 -167
  352. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/METADATA +2 -2
  353. souleyez-3.0.0.dist-info/RECORD +443 -0
  354. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/WHEEL +1 -1
  355. souleyez-2.43.29.dist-info/RECORD +0 -379
  356. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/entry_points.txt +0 -0
  357. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/licenses/LICENSE +0 -0
  358. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,601 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ TheHarvester handler.
4
+
5
+ Consolidates parsing and display logic for theHarvester OSINT jobs.
6
+ """
7
+ import logging
8
+ import os
9
+ import re
10
+ from typing import Any, Dict, List, Optional
11
+ from urllib.parse import urlparse
12
+
13
+ import click
14
+
15
+ from souleyez.engine.job_status import STATUS_DONE, STATUS_NO_RESULTS
16
+ from souleyez.handlers.base import BaseToolHandler
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class TheHarvesterHandler(BaseToolHandler):
22
+ """Handler for theHarvester OSINT jobs."""
23
+
24
+ tool_name = "theharvester"
25
+ display_name = "TheHarvester"
26
+
27
+ # All handlers enabled
28
+ has_error_handler = True
29
+ has_warning_handler = True
30
+ has_no_results_handler = True
31
+ has_done_handler = True
32
+
33
+ # URL patterns for security concerns
34
+ URL_PATTERNS = {
35
+ "auth_endpoints": {
36
+ "patterns": [
37
+ r"/login",
38
+ r"/signin",
39
+ r"/auth",
40
+ r"/oauth",
41
+ r"/sso",
42
+ r"/password",
43
+ r"/forgot",
44
+ ],
45
+ "label": "Authentication endpoint",
46
+ "severity": "medium",
47
+ },
48
+ "admin_panels": {
49
+ "patterns": [
50
+ r"/admin",
51
+ r"/administrator",
52
+ r"/manager",
53
+ r"/console",
54
+ r"/dashboard",
55
+ r"/portal",
56
+ ],
57
+ "label": "Admin/management panel",
58
+ "severity": "high",
59
+ },
60
+ "api_endpoints": {
61
+ "patterns": [
62
+ r"/api/",
63
+ r"/api$",
64
+ r"/graphql",
65
+ r"/rest/",
66
+ r"/v1/",
67
+ r"/v2/",
68
+ r"/swagger",
69
+ r"/openapi",
70
+ ],
71
+ "label": "API endpoint",
72
+ "severity": "medium",
73
+ },
74
+ "file_access": {
75
+ "patterns": [
76
+ r"/upload",
77
+ r"/download",
78
+ r"/files",
79
+ r"/documents",
80
+ r"/attachments",
81
+ ],
82
+ "label": "File access endpoint",
83
+ "severity": "medium",
84
+ },
85
+ "sensitive_pages": {
86
+ "patterns": [
87
+ r"/config",
88
+ r"/settings",
89
+ r"/backup",
90
+ r"/debug",
91
+ r"/phpinfo",
92
+ r"/info\.php",
93
+ r"/test/",
94
+ ],
95
+ "label": "Potentially sensitive page",
96
+ "severity": "high",
97
+ },
98
+ }
99
+
100
+ # Subdomain patterns for security concerns
101
+ SUBDOMAIN_PATTERNS = {
102
+ "dev_staging": {
103
+ "patterns": [
104
+ r"^dev\.",
105
+ r"^staging\.",
106
+ r"^stage\.",
107
+ r"^test\.",
108
+ r"^qa\.",
109
+ r"^uat\.",
110
+ r"^sandbox\.",
111
+ r"^demo\.",
112
+ ],
113
+ "label": "Development/staging environment",
114
+ "severity": "high",
115
+ },
116
+ "internal": {
117
+ "patterns": [
118
+ r"^internal\.",
119
+ r"^intranet\.",
120
+ r"^private\.",
121
+ r"^corp\.",
122
+ r"^vpn\.",
123
+ r"^remote\.",
124
+ ],
125
+ "label": "Internal/corporate system",
126
+ "severity": "high",
127
+ },
128
+ "infrastructure": {
129
+ "patterns": [
130
+ r"^mail\.",
131
+ r"^smtp\.",
132
+ r"^mx\.",
133
+ r"^ftp\.",
134
+ r"^sftp\.",
135
+ r"^ns\d*\.",
136
+ r"^dns\.",
137
+ ],
138
+ "label": "Infrastructure service",
139
+ "severity": "medium",
140
+ },
141
+ "admin_systems": {
142
+ "patterns": [
143
+ r"^admin\.",
144
+ r"^manage\.",
145
+ r"^portal\.",
146
+ r"^panel\.",
147
+ r"^cms\.",
148
+ r"^backend\.",
149
+ ],
150
+ "label": "Administrative system",
151
+ "severity": "high",
152
+ },
153
+ "database": {
154
+ "patterns": [
155
+ r"^db\.",
156
+ r"^database\.",
157
+ r"^mysql\.",
158
+ r"^postgres\.",
159
+ r"^mongo\.",
160
+ r"^redis\.",
161
+ r"^elastic\.",
162
+ ],
163
+ "label": "Database system exposed",
164
+ "severity": "high",
165
+ },
166
+ "cloud_services": {
167
+ "patterns": [
168
+ r"^api\.",
169
+ r"^cdn\.",
170
+ r"^static\.",
171
+ r"^assets\.",
172
+ r"^media\.",
173
+ r"^storage\.",
174
+ r"^s3\.",
175
+ ],
176
+ "label": "Cloud/CDN service",
177
+ "severity": "low",
178
+ },
179
+ }
180
+
181
+ def parse_job(
182
+ self,
183
+ engagement_id: int,
184
+ log_path: str,
185
+ job: Dict[str, Any],
186
+ host_manager: Optional[Any] = None,
187
+ findings_manager: Optional[Any] = None,
188
+ credentials_manager: Optional[Any] = None,
189
+ ) -> Dict[str, Any]:
190
+ """
191
+ Parse theHarvester job results.
192
+
193
+ Extracts OSINT data and stores it in the database.
194
+ """
195
+ try:
196
+ from souleyez.parsers.theharvester_parser import (
197
+ parse_theharvester_output,
198
+ get_osint_stats,
199
+ )
200
+
201
+ # Import managers if not provided
202
+ if host_manager is None:
203
+ from souleyez.storage.hosts import HostManager
204
+
205
+ host_manager = HostManager()
206
+
207
+ # Read the log file
208
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
209
+ log_content = f.read()
210
+
211
+ # Parse theHarvester output
212
+ target = job.get("target", "")
213
+ parsed = parse_theharvester_output(log_content, target)
214
+
215
+ # Store OSINT data
216
+ from souleyez.storage.osint import OsintManager
217
+
218
+ om = OsintManager()
219
+ osint_added = 0
220
+
221
+ # Add emails
222
+ if parsed["emails"]:
223
+ count = om.bulk_add_osint_data(
224
+ engagement_id, "email", parsed["emails"], "theHarvester", target
225
+ )
226
+ osint_added += count
227
+
228
+ # Add hosts/subdomains
229
+ if parsed["hosts"]:
230
+ count = om.bulk_add_osint_data(
231
+ engagement_id, "host", parsed["hosts"], "theHarvester", target
232
+ )
233
+ osint_added += count
234
+
235
+ # Add IPs
236
+ if parsed["ips"]:
237
+ count = om.bulk_add_osint_data(
238
+ engagement_id, "ip", parsed["ips"], "theHarvester", target
239
+ )
240
+ osint_added += count
241
+
242
+ # Add URLs
243
+ if parsed["urls"]:
244
+ count = om.bulk_add_osint_data(
245
+ engagement_id, "url", parsed["urls"], "theHarvester", target
246
+ )
247
+ osint_added += count
248
+
249
+ # Add ASNs
250
+ if parsed["asns"]:
251
+ count = om.bulk_add_osint_data(
252
+ engagement_id, "asn", parsed["asns"], "theHarvester", target
253
+ )
254
+ osint_added += count
255
+
256
+ # Also add discovered IPs and hosts to the hosts table
257
+ hosts_added = 0
258
+ for ip in parsed["ips"]:
259
+ try:
260
+ host_id = host_manager.add_or_update_host(
261
+ engagement_id, {"ip": ip, "status": "discovered"}
262
+ )
263
+ hosts_added += 1
264
+ logger.debug(f"Added IP {ip} to hosts table (host_id={host_id})")
265
+ except Exception as e:
266
+ logger.warning(f"Failed to add IP {ip} to hosts: {e}")
267
+
268
+ stats = get_osint_stats(parsed)
269
+
270
+ # Determine status based on results
271
+ total_osint_found = (
272
+ len(parsed["emails"])
273
+ + len(parsed["hosts"])
274
+ + len(parsed["ips"])
275
+ + len(parsed["urls"])
276
+ )
277
+ if total_osint_found > 0:
278
+ status = STATUS_DONE
279
+ else:
280
+ status = STATUS_NO_RESULTS
281
+
282
+ return {
283
+ "tool": "theHarvester",
284
+ "status": status,
285
+ "osint_added": osint_added,
286
+ "hosts_added": hosts_added,
287
+ "stats": stats,
288
+ "domains": [target] if target else [],
289
+ "target": target,
290
+ "urls": parsed["urls"],
291
+ "ips": parsed["ips"],
292
+ }
293
+ except Exception as e:
294
+ logger.error(f"Error parsing theHarvester job: {e}")
295
+ return {"error": str(e)}
296
+
297
+ def _identify_security_concerns(
298
+ self, urls: List[str], subdomains: List[str]
299
+ ) -> List[Dict]:
300
+ """Identify security concerns in discovered URLs and subdomains."""
301
+ security_concerns = []
302
+
303
+ # Check URLs
304
+ for url in urls:
305
+ try:
306
+ parsed_url = urlparse(url)
307
+ url_path = parsed_url.path.lower()
308
+ if not url_path or url_path == "/":
309
+ continue
310
+ except Exception:
311
+ continue
312
+
313
+ for concern_type, concern_info in self.URL_PATTERNS.items():
314
+ matched = False
315
+ for pattern in concern_info["patterns"]:
316
+ if re.search(pattern, url_path, re.IGNORECASE):
317
+ security_concerns.append(
318
+ {
319
+ "item": url,
320
+ "type": concern_type,
321
+ "label": concern_info["label"],
322
+ "severity": concern_info["severity"],
323
+ "category": "url",
324
+ }
325
+ )
326
+ matched = True
327
+ break
328
+ if matched:
329
+ break
330
+
331
+ # Check subdomains
332
+ for sub in subdomains:
333
+ sub_lower = sub.lower()
334
+ for concern_type, concern_info in self.SUBDOMAIN_PATTERNS.items():
335
+ matched = False
336
+ for pattern in concern_info["patterns"]:
337
+ if re.search(pattern, sub_lower, re.IGNORECASE):
338
+ security_concerns.append(
339
+ {
340
+ "item": sub,
341
+ "type": concern_type,
342
+ "label": concern_info["label"],
343
+ "severity": concern_info["severity"],
344
+ "category": "subdomain",
345
+ }
346
+ )
347
+ matched = True
348
+ break
349
+ if matched:
350
+ break
351
+
352
+ return security_concerns
353
+
354
+ def display_done(
355
+ self,
356
+ job: Dict[str, Any],
357
+ log_path: str,
358
+ show_all: bool = False,
359
+ show_passwords: bool = False,
360
+ ) -> None:
361
+ """Display successful theHarvester scan results."""
362
+ try:
363
+ from souleyez.parsers.theharvester_parser import parse_theharvester_output
364
+
365
+ if not log_path or not os.path.exists(log_path):
366
+ return
367
+
368
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
369
+ log_content = f.read()
370
+ parsed = parse_theharvester_output(log_content, job.get("target", ""))
371
+
372
+ # Collect all results
373
+ emails = parsed.get("emails", [])
374
+ ips = parsed.get("ips", [])
375
+ asns = parsed.get("asns", [])
376
+ urls = parsed.get("urls", parsed.get("base_urls", []))
377
+ subdomains = parsed.get("subdomains", [])
378
+
379
+ has_results = emails or ips or asns or urls or subdomains
380
+
381
+ # Run security analysis
382
+ security_concerns = self._identify_security_concerns(urls, subdomains)
383
+
384
+ # Display security concerns FIRST
385
+ if security_concerns:
386
+ self._display_security_concerns(security_concerns)
387
+
388
+ # Display discovered assets
389
+ click.echo(click.style("=" * 70, fg="cyan"))
390
+ click.echo(click.style("DISCOVERED ASSETS", bold=True, fg="cyan"))
391
+ click.echo(click.style("=" * 70, fg="cyan"))
392
+ click.echo()
393
+
394
+ click.echo(
395
+ click.style(f"Target: {job.get('target', 'unknown')}", bold=True)
396
+ )
397
+ click.echo()
398
+
399
+ if has_results:
400
+ max_items = None if show_all else 10
401
+ max_urls = None if show_all else 15
402
+
403
+ # Emails
404
+ if emails:
405
+ self._display_list("Emails", emails, max_items)
406
+
407
+ # IPs
408
+ if ips:
409
+ self._display_list("IP Addresses", ips, max_items)
410
+
411
+ # ASNs
412
+ if asns:
413
+ self._display_list("ASNs", asns, max_items)
414
+
415
+ # URLs
416
+ if urls:
417
+ self._display_list("Interesting URLs", urls, max_urls)
418
+
419
+ # Subdomains
420
+ if subdomains:
421
+ self._display_list("Hosts Found", subdomains, max_urls)
422
+ else:
423
+ self.display_no_results(job, log_path)
424
+ return
425
+
426
+ click.echo(click.style("=" * 70, fg="cyan"))
427
+ click.echo()
428
+
429
+ except Exception as e:
430
+ logger.debug(f"Error in display_done: {e}")
431
+
432
+ def _display_list(
433
+ self, title: str, items: List[str], max_items: Optional[int]
434
+ ) -> None:
435
+ """Display a list of items with optional truncation."""
436
+ click.echo(click.style(f"{title}: {len(items)}", bold=True))
437
+ display_items = items if max_items is None else items[:max_items]
438
+ for item in display_items:
439
+ click.echo(f" - {item}")
440
+ if max_items and len(items) > max_items:
441
+ click.echo(f" ... and {len(items) - max_items} more")
442
+ click.echo()
443
+
444
+ def _display_security_concerns(self, security_concerns: List[Dict]) -> None:
445
+ """Display security concerns grouped by severity."""
446
+ click.echo(click.style("=" * 70, fg="red"))
447
+ click.echo(click.style("SECURITY CONCERNS", bold=True, fg="red"))
448
+ click.echo(click.style("=" * 70, fg="red"))
449
+ click.echo()
450
+
451
+ # Group by severity
452
+ high_concerns = [c for c in security_concerns if c["severity"] == "high"]
453
+ medium_concerns = [c for c in security_concerns if c["severity"] == "medium"]
454
+ low_concerns = [c for c in security_concerns if c["severity"] == "low"]
455
+
456
+ if high_concerns:
457
+ click.echo(click.style("[HIGH] Critical findings:", fg="red", bold=True))
458
+ by_label = {}
459
+ for c in high_concerns:
460
+ if c["label"] not in by_label:
461
+ by_label[c["label"]] = []
462
+ by_label[c["label"]].append(c["item"])
463
+ for label, items in by_label.items():
464
+ click.echo(f" {label}:")
465
+ for item in items[:5]:
466
+ click.echo(f" - {item}")
467
+ if len(items) > 5:
468
+ click.echo(f" ... and {len(items) - 5} more")
469
+ click.echo()
470
+
471
+ if medium_concerns:
472
+ click.echo(
473
+ click.style("[MEDIUM] Notable findings:", fg="yellow", bold=True)
474
+ )
475
+ by_label = {}
476
+ for c in medium_concerns:
477
+ if c["label"] not in by_label:
478
+ by_label[c["label"]] = []
479
+ by_label[c["label"]].append(c["item"])
480
+ for label, items in by_label.items():
481
+ click.echo(f" {label}:")
482
+ for item in items[:5]:
483
+ click.echo(f" - {item}")
484
+ if len(items) > 5:
485
+ click.echo(f" ... and {len(items) - 5} more")
486
+ click.echo()
487
+
488
+ if low_concerns:
489
+ click.echo(
490
+ click.style("[LOW] Informational:", fg="bright_black", bold=True)
491
+ )
492
+ by_label = {}
493
+ for c in low_concerns:
494
+ if c["label"] not in by_label:
495
+ by_label[c["label"]] = []
496
+ by_label[c["label"]].append(c["item"])
497
+ for label, items in by_label.items():
498
+ click.echo(f" {label}:")
499
+ for item in items[:3]:
500
+ click.echo(f" - {item}")
501
+ if len(items) > 3:
502
+ click.echo(f" ... and {len(items) - 3} more")
503
+ click.echo()
504
+
505
+ click.echo(click.style("=" * 70, fg="red"))
506
+ click.echo()
507
+
508
+ def display_warning(
509
+ self,
510
+ job: Dict[str, Any],
511
+ log_path: str,
512
+ log_content: Optional[str] = None,
513
+ ) -> None:
514
+ """Display warning status for theHarvester scan."""
515
+ click.echo(click.style("=" * 70, fg="yellow"))
516
+ click.echo(click.style("[WARNING] THEHARVESTER SCAN", bold=True, fg="yellow"))
517
+ click.echo(click.style("=" * 70, fg="yellow"))
518
+ click.echo()
519
+ click.echo(" Scan completed with warnings. Check raw logs for details.")
520
+ click.echo(" Press [r] to view raw logs.")
521
+ click.echo()
522
+ click.echo(click.style("=" * 70, fg="yellow"))
523
+ click.echo()
524
+
525
+ def display_error(
526
+ self,
527
+ job: Dict[str, Any],
528
+ log_path: str,
529
+ log_content: Optional[str] = None,
530
+ ) -> None:
531
+ """Display error status for theHarvester scan."""
532
+ # Read log if not provided
533
+ if log_content is None and log_path and os.path.exists(log_path):
534
+ try:
535
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
536
+ log_content = f.read()
537
+ except Exception:
538
+ log_content = ""
539
+
540
+ click.echo(click.style("=" * 70, fg="red"))
541
+ click.echo(click.style("[ERROR] THEHARVESTER FAILED", bold=True, fg="red"))
542
+ click.echo(click.style("=" * 70, fg="red"))
543
+ click.echo()
544
+
545
+ # Check for common theharvester errors
546
+ error_msg = None
547
+ if log_content:
548
+ if "No results found" in log_content:
549
+ error_msg = "No results found for the specified domain"
550
+ elif "Could not resolve" in log_content or (
551
+ "DNS" in log_content and "fail" in log_content.lower()
552
+ ):
553
+ error_msg = "Could not resolve domain"
554
+ elif "timed out" in log_content.lower() or "timeout" in log_content.lower():
555
+ error_msg = "Connection timed out - source may be slow"
556
+ elif (
557
+ "rate limit" in log_content.lower() or "blocked" in log_content.lower()
558
+ ):
559
+ error_msg = "Rate limited or blocked by source"
560
+ elif "API" in log_content and (
561
+ "key" in log_content.lower() or "error" in log_content.lower()
562
+ ):
563
+ error_msg = "API key error - check your API keys configuration"
564
+ elif "[-]" in log_content:
565
+ match = re.search(r"\[-\]\s*(.+?)(?:\n|$)", log_content)
566
+ if match:
567
+ error_msg = match.group(1).strip()[:100]
568
+
569
+ if error_msg:
570
+ click.echo(f" {error_msg}")
571
+ else:
572
+ click.echo(" Scan failed - see raw logs for details (press 'r')")
573
+
574
+ click.echo()
575
+ click.echo(click.style("=" * 70, fg="red"))
576
+ click.echo()
577
+
578
+ def display_no_results(
579
+ self,
580
+ job: Dict[str, Any],
581
+ log_path: str,
582
+ ) -> None:
583
+ """Display no_results status for theHarvester scan."""
584
+ click.echo(click.style("=" * 70, fg="cyan"))
585
+ click.echo(click.style("DISCOVERED ASSETS", bold=True, fg="cyan"))
586
+ click.echo(click.style("=" * 70, fg="cyan"))
587
+ click.echo()
588
+
589
+ click.echo(click.style(f"Target: {job.get('target', 'unknown')}", bold=True))
590
+ click.echo()
591
+ click.echo(click.style("Result: No assets discovered", fg="yellow", bold=True))
592
+ click.echo()
593
+ click.echo(" The scan completed without finding any publicly exposed assets.")
594
+ click.echo()
595
+ click.echo(click.style("Tips:", dim=True))
596
+ click.echo(" - Try different data sources (-b google,bing,linkedin)")
597
+ click.echo(" - Check if the domain is correct")
598
+ click.echo(" - Some organizations have minimal public exposure")
599
+ click.echo()
600
+ click.echo(click.style("=" * 70, fg="cyan"))
601
+ click.echo()