souleyez 2.16.0__py3-none-any.whl → 2.26.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. souleyez/__init__.py +1 -1
  2. souleyez/assets/__init__.py +1 -0
  3. souleyez/assets/souleyez-icon.png +0 -0
  4. souleyez/core/msf_sync_manager.py +15 -5
  5. souleyez/core/tool_chaining.py +221 -29
  6. souleyez/detection/validator.py +4 -2
  7. souleyez/docs/README.md +2 -2
  8. souleyez/docs/user-guide/installation.md +14 -1
  9. souleyez/engine/background.py +25 -1
  10. souleyez/engine/result_handler.py +129 -0
  11. souleyez/integrations/siem/splunk.py +58 -11
  12. souleyez/main.py +103 -4
  13. souleyez/parsers/crackmapexec_parser.py +101 -43
  14. souleyez/parsers/dnsrecon_parser.py +50 -35
  15. souleyez/parsers/enum4linux_parser.py +101 -21
  16. souleyez/parsers/http_fingerprint_parser.py +319 -0
  17. souleyez/parsers/hydra_parser.py +56 -5
  18. souleyez/parsers/impacket_parser.py +123 -44
  19. souleyez/parsers/john_parser.py +47 -14
  20. souleyez/parsers/msf_parser.py +20 -5
  21. souleyez/parsers/nmap_parser.py +145 -28
  22. souleyez/parsers/smbmap_parser.py +69 -25
  23. souleyez/parsers/sqlmap_parser.py +72 -26
  24. souleyez/parsers/theharvester_parser.py +21 -13
  25. souleyez/plugins/gobuster.py +96 -3
  26. souleyez/plugins/http_fingerprint.py +592 -0
  27. souleyez/plugins/msf_exploit.py +6 -3
  28. souleyez/plugins/nuclei.py +41 -17
  29. souleyez/ui/interactive.py +130 -20
  30. souleyez/ui/setup_wizard.py +424 -58
  31. souleyez/ui/tool_setup.py +52 -52
  32. souleyez/utils/tool_checker.py +75 -13
  33. {souleyez-2.16.0.dist-info → souleyez-2.26.0.dist-info}/METADATA +16 -3
  34. {souleyez-2.16.0.dist-info → souleyez-2.26.0.dist-info}/RECORD +38 -34
  35. {souleyez-2.16.0.dist-info → souleyez-2.26.0.dist-info}/WHEEL +0 -0
  36. {souleyez-2.16.0.dist-info → souleyez-2.26.0.dist-info}/entry_points.txt +0 -0
  37. {souleyez-2.16.0.dist-info → souleyez-2.26.0.dist-info}/licenses/LICENSE +0 -0
  38. {souleyez-2.16.0.dist-info → souleyez-2.26.0.dist-info}/top_level.txt +0 -0
@@ -56,47 +56,62 @@ def parse_dnsrecon_output(output: str, target: str = "") -> Dict[str, Any]:
56
56
  if not line_stripped or line_stripped.startswith('[-]'):
57
57
  continue
58
58
 
59
- # Parse lines starting with [*] - these contain DNS records
60
- # Format: [*] A cybersoulsecurity.com 198.185.159.144
61
- # Format: [*] NS ns04.squarespacedns.com 45.54.22.193
62
- # Format: [*] MX alt1.aspmx.l.google.com 192.178.164.26
63
- # Format: [*] SOA dns1.p01.nsone.net 198.51.44.1
59
+ # Parse DNS records from dnsrecon output
60
+ # Old format: [*] A cybersoulsecurity.com 198.185.159.144
61
+ # New format: 2026-01-08T13:50:16.302153-1000 INFO SOA dns1.p01.nsone.net 198.51.44.1
62
+ # New format: 2026-01-08T13:50:17.112742-1000 INFO NS dns4.p01.nsone.net 198.51.45.65
63
+
64
+ record_type = None
65
+ hostname = None
66
+ ip = None
64
67
 
65
68
  if line_stripped.startswith('[*]'):
69
+ # Old format: [*] <type> <hostname> <ip>
66
70
  parts = line_stripped.split()
67
- if len(parts) >= 4: # [*] <type> <hostname> <ip>
71
+ if len(parts) >= 4:
68
72
  record_type = parts[1]
69
73
  hostname = parts[2].lower()
70
74
  ip = parts[3] if len(parts) > 3 else ''
71
-
72
- # Validate IP (both IPv4 and basic IPv6)
73
- is_ipv4 = re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip)
74
- # is_ipv6 = ':' in ip # not used currently
75
-
76
- if record_type == 'A' and is_ipv4:
77
- if hostname not in seen_hosts:
78
- seen_hosts.add(hostname)
79
- result['hosts'].append({
80
- 'hostname': hostname,
81
- 'ip': ip,
82
- 'type': 'A'
83
- })
84
- if hostname != target and hostname not in seen_subdomains:
85
- seen_subdomains.add(hostname)
86
- result['subdomains'].append(hostname)
87
-
88
- elif record_type == 'NS':
89
- if hostname not in result['nameservers']:
90
- result['nameservers'].append(hostname)
91
-
92
- elif record_type == 'MX':
93
- if hostname not in result['mail_servers']:
94
- result['mail_servers'].append(hostname)
95
-
96
- elif record_type == 'SOA':
97
- # SOA records can also be nameservers
98
- if hostname not in result['nameservers']:
99
- result['nameservers'].append(hostname)
75
+ elif ' INFO ' in line_stripped:
76
+ # New format: TIMESTAMP INFO <type> <hostname> <ip>
77
+ # Split on INFO and parse the rest
78
+ info_idx = line_stripped.find(' INFO ')
79
+ if info_idx != -1:
80
+ record_part = line_stripped[info_idx + 6:].strip()
81
+ parts = record_part.split()
82
+ if len(parts) >= 3:
83
+ record_type = parts[0]
84
+ hostname = parts[1].lower()
85
+ ip = parts[2] if len(parts) > 2 else ''
86
+
87
+ if record_type and hostname:
88
+ # Validate IP (both IPv4 and basic IPv6)
89
+ is_ipv4 = re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', ip) if ip else False
90
+
91
+ if record_type == 'A' and is_ipv4:
92
+ if hostname not in seen_hosts:
93
+ seen_hosts.add(hostname)
94
+ result['hosts'].append({
95
+ 'hostname': hostname,
96
+ 'ip': ip,
97
+ 'type': 'A'
98
+ })
99
+ if hostname != target and hostname not in seen_subdomains:
100
+ seen_subdomains.add(hostname)
101
+ result['subdomains'].append(hostname)
102
+
103
+ elif record_type == 'NS':
104
+ if hostname not in result['nameservers']:
105
+ result['nameservers'].append(hostname)
106
+
107
+ elif record_type == 'MX':
108
+ if hostname not in result['mail_servers']:
109
+ result['mail_servers'].append(hostname)
110
+
111
+ elif record_type == 'SOA':
112
+ # SOA records can also be nameservers
113
+ if hostname not in result['nameservers']:
114
+ result['nameservers'].append(hostname)
100
115
 
101
116
  # Parse subdomain brute force results: [*] Subdomain: api.example.com IP: 1.2.3.4
102
117
  subdomain_match = re.search(r'Subdomain:\s+(\S+)\s+IP:\s+(\d+\.\d+\.\d+\.\d+)', line_stripped)
@@ -51,16 +51,38 @@ def parse_enum4linux_output(output: str, target: str = "") -> Dict[str, Any]:
51
51
 
52
52
  def _is_enum4linux_ng_output(output: str) -> bool:
53
53
  """Detect if output is from enum4linux-ng (YAML-style format)."""
54
- # enum4linux-ng indicators - look for its specific patterns
54
+ # Primary indicator - explicit version string (most reliable)
55
+ if re.search(r'ENUM4LINUX\s*-\s*next\s*generation', output, re.IGNORECASE):
56
+ return True
57
+ if re.search(r'enum4linux-ng', output, re.IGNORECASE):
58
+ return True
59
+
60
+ # Secondary indicators - look for YAML-style patterns unique to ng
55
61
  ng_indicators = [
56
- re.search(r'ENUM4LINUX - next generation', output),
57
- re.search(r'After merging user results', output),
58
- re.search(r'After merging share results', output),
59
- re.search(r'^\s+username:\s+', output, re.MULTILINE),
60
- re.search(r"^'\d+':\s*$", output, re.MULTILINE), # RID entries like '1000':
62
+ re.search(r'After merging (user|share|group) results', output, re.IGNORECASE),
63
+ re.search(r'^\s{2,}username:\s+', output, re.MULTILINE), # Indented YAML-style
64
+ re.search(r"^'?\d+'?:\s*$", output, re.MULTILINE), # RID entries: '1000': or 1000:
65
+ re.search(r'^\s{2,}(groupname|name|type|comment):\s+', output, re.MULTILINE),
66
+ re.search(r'Trying to get SID from lsaquery', output, re.IGNORECASE),
67
+ ]
68
+
69
+ # Classic enum4linux indicators (to confirm it's NOT ng)
70
+ classic_indicators = [
71
+ re.search(r'enum4linux v\d', output, re.IGNORECASE),
72
+ re.search(r'Starting enum4linux v', output, re.IGNORECASE),
73
+ re.search(r'Sharename\s+Type\s+Comment', output), # Table header
74
+ re.search(r'\|\s+Users on', output),
61
75
  ]
62
- # If we find at least 2 indicators, it's probably enum4linux-ng
63
- return sum(1 for ind in ng_indicators if ind) >= 2
76
+
77
+ ng_count = sum(1 for ind in ng_indicators if ind)
78
+ classic_count = sum(1 for ind in classic_indicators if ind)
79
+
80
+ # If we have classic indicators and no/few ng indicators, it's classic
81
+ if classic_count >= 2 and ng_count < 2:
82
+ return False
83
+
84
+ # If we find at least 2 ng indicators, it's probably enum4linux-ng
85
+ return ng_count >= 2
64
86
 
65
87
 
66
88
  def _parse_enum4linux_ng_output(output: str, target: str = "") -> Dict[str, Any]:
@@ -297,21 +319,41 @@ def _parse_enum4linux_classic_output(output: str, target: str = "") -> Dict[str,
297
319
 
298
320
  # Parse user lines from RID cycling output (Local User or Domain User)
299
321
  elif current_section == 'users' and line and not line.startswith('='):
300
- # Format: "S-1-5-21-...-RID DOMAIN\username (Local User)"
301
- user_match = re.match(r'S-1-5-21-[0-9-]+ \S+\\(\S+) \((Local User|Domain User)\)', line)
322
+ # Format variations:
323
+ # "S-1-5-21-...-RID DOMAIN\username (Local User)"
324
+ # "S-1-5-21-...-RID DOMAIN\\username (Local User)"
325
+ # "username (Local User)" - simplified format
326
+ # "[+] DOMAIN\username" - alternate prefix
327
+
328
+ # Try full SID format first (flexible escaping)
329
+ user_match = re.search(r'S-1-5-21-[\d-]+\s+\S+[\\]+(\S+)\s+\((Local|Domain)\s*User\)', line, re.IGNORECASE)
302
330
  if user_match:
303
331
  username = user_match.group(1)
304
- if username not in result['users']:
332
+ if username and username not in result['users']:
305
333
  result['users'].append(username)
306
-
307
- # Also parse group lines from RID cycling (Domain Group)
334
+ else:
335
+ # Try simpler DOMAIN\username format
336
+ user_match = re.search(r'[\[\+\]\s]*\S+[\\]+(\S+)\s+\((Local|Domain)\s*User\)', line, re.IGNORECASE)
337
+ if user_match:
338
+ username = user_match.group(1)
339
+ if username and username not in result['users']:
340
+ result['users'].append(username)
341
+
342
+ # Also parse group lines from RID cycling (Domain Group, Local Group)
308
343
  elif current_section == 'groups' and line and not line.startswith('='):
309
- # Format: "S-1-5-21-...-RID DOMAIN\groupname (Domain Group)"
310
- group_match = re.match(r'S-1-5-21-[0-9-]+ \S+\\(\S+) \(Domain Group\)', line)
344
+ # Format variations similar to users
345
+ group_match = re.search(r'S-1-5-21-[\d-]+\s+\S+[\\]+(\S+)\s+\((Domain|Local)\s*Group\)', line, re.IGNORECASE)
311
346
  if group_match:
312
347
  groupname = group_match.group(1)
313
- if groupname not in result['groups']:
348
+ if groupname and groupname not in result['groups']:
314
349
  result['groups'].append(groupname)
350
+ else:
351
+ # Try simpler format
352
+ group_match = re.search(r'[\[\+\]\s]*\S+[\\]+(\S+)\s+\((Domain|Local)\s*Group\)', line, re.IGNORECASE)
353
+ if group_match:
354
+ groupname = group_match.group(1)
355
+ if groupname and groupname not in result['groups']:
356
+ result['groups'].append(groupname)
315
357
 
316
358
  return result
317
359
 
@@ -322,23 +364,61 @@ def _parse_share_line(line: str) -> Dict[str, Any]:
322
364
 
323
365
  Example: "print$ Disk Printer Drivers"
324
366
  Example: "tmp Disk oh noes!"
367
+ Example: "IPC$ IPC IPC Service (Samba)"
325
368
  """
326
- # Split on multiple whitespace
327
- parts = re.split(r'\s{2,}', line.strip())
369
+ line = line.strip()
370
+ if not line:
371
+ return None
328
372
 
373
+ # Try multiple parsing strategies for different formats
374
+
375
+ # Strategy 1: Split on 2+ whitespace (most common)
376
+ parts = re.split(r'\s{2,}', line)
377
+ if len(parts) >= 2:
378
+ share_name = parts[0].strip()
379
+ share_type = parts[1].strip()
380
+ comment = parts[2].strip() if len(parts) > 2 else ''
381
+
382
+ # Validate share type is a known type
383
+ if share_type.upper() in ['DISK', 'IPC', 'PRINT', 'PRINTER', 'COMM', 'DEVICE']:
384
+ return {
385
+ 'name': share_name,
386
+ 'type': share_type,
387
+ 'comment': comment,
388
+ 'mapping': None,
389
+ 'listing': None,
390
+ 'writing': None
391
+ }
392
+
393
+ # Strategy 2: Tab-separated
394
+ parts = line.split('\t')
329
395
  if len(parts) >= 2:
330
396
  share_name = parts[0].strip()
331
397
  share_type = parts[1].strip()
332
398
  comment = parts[2].strip() if len(parts) > 2 else ''
333
399
 
400
+ if share_type.upper() in ['DISK', 'IPC', 'PRINT', 'PRINTER', 'COMM', 'DEVICE']:
401
+ return {
402
+ 'name': share_name,
403
+ 'type': share_type,
404
+ 'comment': comment,
405
+ 'mapping': None,
406
+ 'listing': None,
407
+ 'writing': None
408
+ }
409
+
410
+ # Strategy 3: Regex for flexible whitespace (single space minimum)
411
+ match = re.match(r'^(\S+)\s+(Disk|IPC|Print|Printer|Comm|Device)\s*(.*)?$', line, re.IGNORECASE)
412
+ if match:
334
413
  return {
335
- 'name': share_name,
336
- 'type': share_type,
337
- 'comment': comment,
414
+ 'name': match.group(1),
415
+ 'type': match.group(2),
416
+ 'comment': match.group(3).strip() if match.group(3) else '',
338
417
  'mapping': None,
339
418
  'listing': None,
340
419
  'writing': None
341
420
  }
421
+
342
422
  return None
343
423
 
344
424
 
@@ -0,0 +1,319 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ souleyez.parsers.http_fingerprint_parser
4
+
5
+ Parses HTTP fingerprint output to extract WAF, CDN, managed hosting,
6
+ and technology information.
7
+ """
8
+ import json
9
+ import re
10
+ from typing import Dict, Any, List, Optional
11
+
12
+
13
+ def parse_http_fingerprint_output(output: str, target: str = "") -> Dict[str, Any]:
14
+ """
15
+ Parse HTTP fingerprint output and extract detection results.
16
+
17
+ Args:
18
+ output: Raw output from http_fingerprint plugin
19
+ target: Target URL from job
20
+
21
+ Returns:
22
+ Dict with structure:
23
+ {
24
+ 'target': str,
25
+ 'status_code': int,
26
+ 'server': str,
27
+ 'server_version': str,
28
+ 'waf': [str],
29
+ 'cdn': [str],
30
+ 'managed_hosting': str or None,
31
+ 'technologies': [str],
32
+ 'tls': {'version': str, 'cipher': str, 'bits': int},
33
+ 'headers': {str: str},
34
+ 'redirect_url': str or None,
35
+ 'error': str or None,
36
+ }
37
+ """
38
+ result = {
39
+ 'target': target,
40
+ 'status_code': None,
41
+ 'server': None,
42
+ 'server_version': None,
43
+ 'waf': [],
44
+ 'cdn': [],
45
+ 'managed_hosting': None,
46
+ 'technologies': [],
47
+ 'tls': None,
48
+ 'headers': {},
49
+ 'redirect_url': None,
50
+ 'error': None,
51
+ }
52
+
53
+ # Try to extract JSON result first (most reliable)
54
+ json_match = re.search(r'=== JSON_RESULT ===\n(.+?)\n=== END_JSON_RESULT ===', output, re.DOTALL)
55
+ if json_match:
56
+ try:
57
+ json_result = json.loads(json_match.group(1))
58
+ result.update(json_result)
59
+ result['target'] = target or result.get('target', '')
60
+ return result
61
+ except json.JSONDecodeError:
62
+ pass
63
+
64
+ # Fall back to parsing text output
65
+ lines = output.split('\n')
66
+
67
+ for line in lines:
68
+ line = line.strip()
69
+
70
+ # Parse HTTP status
71
+ if line.startswith('HTTP Status:'):
72
+ match = re.search(r'HTTP Status:\s+(\d+)', line)
73
+ if match:
74
+ result['status_code'] = int(match.group(1))
75
+
76
+ # Parse server
77
+ elif line.startswith('Server:'):
78
+ result['server'] = line.replace('Server:', '').strip()
79
+
80
+ # Parse redirect
81
+ elif line.startswith('Redirected to:'):
82
+ result['redirect_url'] = line.replace('Redirected to:', '').strip()
83
+
84
+ # Parse TLS
85
+ elif line.startswith('TLS:'):
86
+ match = re.search(r'TLS:\s+(\S+)\s+\((.+?)\)', line)
87
+ if match:
88
+ result['tls'] = {
89
+ 'version': match.group(1),
90
+ 'cipher': match.group(2),
91
+ }
92
+
93
+ # Parse managed hosting
94
+ elif line.startswith('MANAGED HOSTING DETECTED:'):
95
+ result['managed_hosting'] = line.replace('MANAGED HOSTING DETECTED:', '').strip()
96
+
97
+ # Parse WAF (multi-line section)
98
+ elif line.startswith('WAF/Protection Detected:'):
99
+ continue # Header line, actual entries follow
100
+
101
+ # Parse CDN (multi-line section)
102
+ elif line.startswith('CDN Detected:'):
103
+ continue # Header line, actual entries follow
104
+
105
+ # Parse technologies (multi-line section)
106
+ elif line.startswith('Technologies:'):
107
+ continue # Header line, actual entries follow
108
+
109
+ # Parse list items (WAF, CDN, Technologies)
110
+ elif line.startswith('- '):
111
+ item = line[2:].strip()
112
+ # Determine which list this belongs to based on context
113
+ # This is a simple heuristic - JSON parsing is more reliable
114
+ if any(waf_keyword in item.lower() for waf_keyword in ['waf', 'cloudflare', 'akamai', 'imperva', 'sucuri', 'f5']):
115
+ if item not in result['waf']:
116
+ result['waf'].append(item)
117
+ elif any(cdn_keyword in item.lower() for cdn_keyword in ['cdn', 'cloudfront', 'fastly', 'varnish', 'edge']):
118
+ if item not in result['cdn']:
119
+ result['cdn'].append(item)
120
+ else:
121
+ if item not in result['technologies']:
122
+ result['technologies'].append(item)
123
+
124
+ # Parse error
125
+ elif line.startswith('ERROR:'):
126
+ result['error'] = line.replace('ERROR:', '').strip()
127
+
128
+ return result
129
+
130
+
131
+ def is_managed_hosting(parsed_data: Dict[str, Any]) -> bool:
132
+ """
133
+ Check if target is a managed hosting platform.
134
+
135
+ Args:
136
+ parsed_data: Output from parse_http_fingerprint_output()
137
+
138
+ Returns:
139
+ True if managed hosting platform detected
140
+ """
141
+ return parsed_data.get('managed_hosting') is not None
142
+
143
+
144
+ def get_managed_hosting_platform(parsed_data: Dict[str, Any]) -> Optional[str]:
145
+ """
146
+ Get the name of the managed hosting platform.
147
+
148
+ Args:
149
+ parsed_data: Output from parse_http_fingerprint_output()
150
+
151
+ Returns:
152
+ Platform name or None
153
+ """
154
+ return parsed_data.get('managed_hosting')
155
+
156
+
157
+ def has_waf(parsed_data: Dict[str, Any]) -> bool:
158
+ """
159
+ Check if WAF is detected.
160
+
161
+ Args:
162
+ parsed_data: Output from parse_http_fingerprint_output()
163
+
164
+ Returns:
165
+ True if WAF detected
166
+ """
167
+ return len(parsed_data.get('waf', [])) > 0
168
+
169
+
170
+ def get_wafs(parsed_data: Dict[str, Any]) -> List[str]:
171
+ """
172
+ Get list of detected WAFs.
173
+
174
+ Args:
175
+ parsed_data: Output from parse_http_fingerprint_output()
176
+
177
+ Returns:
178
+ List of WAF names
179
+ """
180
+ return parsed_data.get('waf', [])
181
+
182
+
183
+ def has_cdn(parsed_data: Dict[str, Any]) -> bool:
184
+ """
185
+ Check if CDN is detected.
186
+
187
+ Args:
188
+ parsed_data: Output from parse_http_fingerprint_output()
189
+
190
+ Returns:
191
+ True if CDN detected
192
+ """
193
+ return len(parsed_data.get('cdn', [])) > 0
194
+
195
+
196
+ def get_cdns(parsed_data: Dict[str, Any]) -> List[str]:
197
+ """
198
+ Get list of detected CDNs.
199
+
200
+ Args:
201
+ parsed_data: Output from parse_http_fingerprint_output()
202
+
203
+ Returns:
204
+ List of CDN names
205
+ """
206
+ return parsed_data.get('cdn', [])
207
+
208
+
209
+ def build_fingerprint_context(parsed_data: Dict[str, Any]) -> Dict[str, Any]:
210
+ """
211
+ Build context dict for use in tool chaining.
212
+
213
+ This is used to pass fingerprint data to downstream tools
214
+ so they can make smarter decisions.
215
+
216
+ Args:
217
+ parsed_data: Output from parse_http_fingerprint_output()
218
+
219
+ Returns:
220
+ Context dict for tool chaining
221
+ """
222
+ return {
223
+ 'http_fingerprint': {
224
+ 'managed_hosting': parsed_data.get('managed_hosting'),
225
+ 'waf': parsed_data.get('waf', []),
226
+ 'cdn': parsed_data.get('cdn', []),
227
+ 'server': parsed_data.get('server'),
228
+ 'technologies': parsed_data.get('technologies', []),
229
+ 'status_code': parsed_data.get('status_code'),
230
+ }
231
+ }
232
+
233
+
234
+ def get_tool_recommendations(parsed_data: Dict[str, Any]) -> Dict[str, Any]:
235
+ """
236
+ Get recommendations for tool configuration based on fingerprint.
237
+
238
+ Args:
239
+ parsed_data: Output from parse_http_fingerprint_output()
240
+
241
+ Returns:
242
+ Dict with tool-specific recommendations
243
+ """
244
+ recommendations = {
245
+ 'nikto': {
246
+ 'skip_cgi': False,
247
+ 'extra_args': [],
248
+ 'reason': None,
249
+ },
250
+ 'nuclei': {
251
+ 'extra_args': [],
252
+ 'skip_tags': [],
253
+ 'reason': None,
254
+ },
255
+ 'sqlmap': {
256
+ 'tamper_scripts': [],
257
+ 'extra_args': [],
258
+ 'reason': None,
259
+ },
260
+ 'general': {
261
+ 'notes': [],
262
+ }
263
+ }
264
+
265
+ # Managed hosting recommendations
266
+ if parsed_data.get('managed_hosting'):
267
+ platform = parsed_data['managed_hosting']
268
+ recommendations['nikto']['skip_cgi'] = True
269
+ recommendations['nikto']['extra_args'] = ['-C', 'none', '-Tuning', 'x6']
270
+ recommendations['nikto']['reason'] = f"Managed hosting ({platform}) - CGI enumeration skipped"
271
+
272
+ recommendations['general']['notes'].append(
273
+ f"Target is hosted on {platform} - limited vulnerability surface expected"
274
+ )
275
+
276
+ # WAF recommendations
277
+ wafs = parsed_data.get('waf', [])
278
+ if wafs:
279
+ waf_list = ', '.join(wafs)
280
+ recommendations['general']['notes'].append(f"WAF detected: {waf_list}")
281
+
282
+ # SQLMap tamper scripts for common WAFs
283
+ for waf in wafs:
284
+ waf_lower = waf.lower()
285
+ if 'cloudflare' in waf_lower:
286
+ recommendations['sqlmap']['tamper_scripts'].extend(['between', 'randomcase', 'space2comment'])
287
+ elif 'akamai' in waf_lower:
288
+ recommendations['sqlmap']['tamper_scripts'].extend(['charencode', 'space2plus'])
289
+ elif 'imperva' in waf_lower or 'incapsula' in waf_lower:
290
+ recommendations['sqlmap']['tamper_scripts'].extend(['randomcase', 'between'])
291
+
292
+ if recommendations['sqlmap']['tamper_scripts']:
293
+ # Dedupe
294
+ recommendations['sqlmap']['tamper_scripts'] = list(set(recommendations['sqlmap']['tamper_scripts']))
295
+ recommendations['sqlmap']['reason'] = f"WAF bypass tamper scripts for {waf_list}"
296
+
297
+ # CDN recommendations
298
+ cdns = parsed_data.get('cdn', [])
299
+ if cdns:
300
+ cdn_list = ', '.join(cdns)
301
+ recommendations['general']['notes'].append(
302
+ f"CDN detected: {cdn_list} - responses may be cached, hitting edge not origin"
303
+ )
304
+
305
+ return recommendations
306
+
307
+
308
+ # Export the main functions
309
+ __all__ = [
310
+ 'parse_http_fingerprint_output',
311
+ 'is_managed_hosting',
312
+ 'get_managed_hosting_platform',
313
+ 'has_waf',
314
+ 'get_wafs',
315
+ 'has_cdn',
316
+ 'get_cdns',
317
+ 'build_fingerprint_context',
318
+ 'get_tool_recommendations',
319
+ ]
@@ -85,13 +85,24 @@ def parse_hydra_output(output: str, target: str = "") -> Dict[str, Any]:
85
85
  'password': attempt_match.group(3)
86
86
  }
87
87
 
88
- # Parse successful login lines
89
- # Format: [PORT][SERVICE] host: HOST login: USER password: PASS
88
+ # Parse successful login lines with multiple format support
89
+ # Format 1: [PORT][SERVICE] host: HOST login: USER password: PASS
90
+ # Format 2: [PORT][SERVICE] host: HOST login: USER password: PASS (single space)
91
+ # Format 3: [SERVICE][PORT] host: HOST login: USER password: PASS (swapped)
92
+ # Format 4: [PORT][SERVICE] HOST login: USER password: PASS (no "host:")
93
+
94
+ login_match = None
95
+ port = None
96
+ service = None
97
+ host = None
98
+ username = None
99
+ password = None
100
+
101
+ # Try standard format: [PORT][SERVICE] host: HOST login: USER password: PASS
90
102
  login_match = re.search(
91
- r'\[(\d+)\]\[([\w-]+)\]\s+host:\s+(\S+)\s+login:\s+(\S+)\s+password:\s+(.+)',
92
- line_stripped
103
+ r'\[(\d+)\]\[([\w-]+)\]\s+host:\s*(\S+)\s+login:\s*(\S+)\s+password:\s*(.+)',
104
+ line_stripped, re.IGNORECASE
93
105
  )
94
-
95
106
  if login_match:
96
107
  port = int(login_match.group(1))
97
108
  service = login_match.group(2).lower()
@@ -99,6 +110,46 @@ def parse_hydra_output(output: str, target: str = "") -> Dict[str, Any]:
99
110
  username = login_match.group(4)
100
111
  password = login_match.group(5).strip()
101
112
 
113
+ # Try swapped format: [SERVICE][PORT]
114
+ if not login_match:
115
+ login_match = re.search(
116
+ r'\[([\w-]+)\]\[(\d+)\]\s+host:\s*(\S+)\s+login:\s*(\S+)\s+password:\s*(.+)',
117
+ line_stripped, re.IGNORECASE
118
+ )
119
+ if login_match:
120
+ service = login_match.group(1).lower()
121
+ port = int(login_match.group(2))
122
+ host = login_match.group(3)
123
+ username = login_match.group(4)
124
+ password = login_match.group(5).strip()
125
+
126
+ # Try format without "host:" label
127
+ if not login_match:
128
+ login_match = re.search(
129
+ r'\[(\d+)\]\[([\w-]+)\]\s+(\d+\.\d+\.\d+\.\d+|\S+)\s+login:\s*(\S+)\s+password:\s*(.+)',
130
+ line_stripped, re.IGNORECASE
131
+ )
132
+ if login_match:
133
+ port = int(login_match.group(1))
134
+ service = login_match.group(2).lower()
135
+ host = login_match.group(3)
136
+ username = login_match.group(4)
137
+ password = login_match.group(5).strip()
138
+
139
+ # Try flexible format with any whitespace between fields
140
+ if not login_match:
141
+ login_match = re.search(
142
+ r'\[(\d+)\]\[([\w-]+)\].*?(?:host:?\s*)?(\d+\.\d+\.\d+\.\d+|\S+\.\S+).*?login:?\s*(\S+).*?password:?\s*(.+)',
143
+ line_stripped, re.IGNORECASE
144
+ )
145
+ if login_match:
146
+ port = int(login_match.group(1))
147
+ service = login_match.group(2).lower()
148
+ host = login_match.group(3)
149
+ username = login_match.group(4)
150
+ password = login_match.group(5).strip()
151
+
152
+ if login_match and port and service and username:
102
153
  # Store service info if not already set
103
154
  if not result['service']:
104
155
  result['service'] = service