souleyez 2.22.0__py3-none-any.whl → 2.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of souleyez might be problematic. Click here for more details.

Files changed (37) hide show
  1. souleyez/__init__.py +1 -1
  2. souleyez/assets/__init__.py +1 -0
  3. souleyez/assets/souleyez-icon.png +0 -0
  4. souleyez/core/msf_sync_manager.py +15 -5
  5. souleyez/core/tool_chaining.py +126 -26
  6. souleyez/detection/validator.py +4 -2
  7. souleyez/docs/README.md +2 -2
  8. souleyez/docs/user-guide/configuration.md +1 -1
  9. souleyez/docs/user-guide/installation.md +14 -1
  10. souleyez/engine/background.py +620 -154
  11. souleyez/engine/result_handler.py +262 -1
  12. souleyez/engine/worker_manager.py +98 -2
  13. souleyez/main.py +103 -4
  14. souleyez/parsers/crackmapexec_parser.py +101 -43
  15. souleyez/parsers/dnsrecon_parser.py +50 -35
  16. souleyez/parsers/enum4linux_parser.py +101 -21
  17. souleyez/parsers/http_fingerprint_parser.py +319 -0
  18. souleyez/parsers/hydra_parser.py +56 -5
  19. souleyez/parsers/impacket_parser.py +123 -44
  20. souleyez/parsers/john_parser.py +47 -14
  21. souleyez/parsers/msf_parser.py +20 -5
  22. souleyez/parsers/nmap_parser.py +48 -27
  23. souleyez/parsers/smbmap_parser.py +39 -23
  24. souleyez/parsers/sqlmap_parser.py +18 -9
  25. souleyez/parsers/theharvester_parser.py +21 -13
  26. souleyez/plugins/http_fingerprint.py +598 -0
  27. souleyez/plugins/nuclei.py +41 -17
  28. souleyez/ui/interactive.py +99 -7
  29. souleyez/ui/setup_wizard.py +93 -5
  30. souleyez/ui/tool_setup.py +52 -52
  31. souleyez/utils/tool_checker.py +45 -5
  32. {souleyez-2.22.0.dist-info → souleyez-2.27.0.dist-info}/METADATA +16 -3
  33. {souleyez-2.22.0.dist-info → souleyez-2.27.0.dist-info}/RECORD +37 -33
  34. {souleyez-2.22.0.dist-info → souleyez-2.27.0.dist-info}/WHEEL +0 -0
  35. {souleyez-2.22.0.dist-info → souleyez-2.27.0.dist-info}/entry_points.txt +0 -0
  36. {souleyez-2.22.0.dist-info → souleyez-2.27.0.dist-info}/licenses/LICENSE +0 -0
  37. {souleyez-2.22.0.dist-info → souleyez-2.27.0.dist-info}/top_level.txt +0 -0
@@ -7,9 +7,16 @@ from typing import Dict, Any
7
7
 
8
8
 
9
9
  def strip_ansi_codes(text: str) -> str:
10
- """Remove ANSI escape codes from text."""
11
- ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
12
- return ansi_escape.sub('', text)
10
+ """Remove ANSI escape codes and other terminal control sequences from text."""
11
+ # Pattern 1: Standard ANSI escape sequences
12
+ text = re.sub(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])', '', text)
13
+ # Pattern 2: OSC sequences (Operating System Command)
14
+ text = re.sub(r'\x1B\].*?\x07', '', text)
15
+ # Pattern 3: Simple color codes
16
+ text = re.sub(r'\x1b\[[0-9;]*m', '', text)
17
+ # Pattern 4: Carriage returns and other control chars (except newlines)
18
+ text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', text)
19
+ return text
13
20
 
14
21
 
15
22
  def parse_msf_ssh_version(output: str, target: str) -> Dict[str, Any]:
@@ -254,6 +261,7 @@ def parse_msf_login_success(output: str, target: str, module: str) -> Dict[str,
254
261
  seen_creds = set() # Avoid duplicates
255
262
 
256
263
  # Pattern 1: [+] 10.0.0.82:22 - Success: 'username:password' 'additional info'
264
+ # Also handles: [+] 10.0.0.82:22 - Success: "username:password"
257
265
  success_pattern1 = r'\[\+\]\s+[\d.]+:(\d+)\s+-\s+Success:\s+[\'"]([^:]+):([^\'\"]+)[\'"]'
258
266
 
259
267
  # Pattern 2: [+] IP:PORT - IP:PORT - Login Successful: user:pass@database
@@ -268,6 +276,13 @@ def parse_msf_login_success(output: str, target: str, module: str) -> Dict[str,
268
276
  # MSF telnet_login uses "username:password login: Login OK" format
269
277
  success_pattern_telnet = r'\[\+\]\s+[\d.]+:(\d+).*-\s+([^:\s]+):([^\s]+)\s+login:\s+Login OK'
270
278
 
279
+ # Pattern 5: Flexible [+] with credentials anywhere (fallback)
280
+ # Handles: [+] 10.0.0.82:22 Found credentials: user:pass
281
+ success_pattern_flexible = r'\[\+\]\s+[\d.]+:(\d+).*(?:credential|found|valid).*?[\'"]?([^:\s\'\"]+):([^\'\"@\s]+)[\'"]?'
282
+
283
+ # Pattern 6: RDP format [+] 10.0.0.82:3389 - DOMAIN\user:password - Success
284
+ success_pattern_rdp = r'\[\+\]\s+[\d.]+:(\d+).*?([^\\:\s]+\\)?([^:\s]+):([^\s-]+)\s*-\s*Success'
285
+
271
286
  # Try pattern 3 first (VNC with empty username)
272
287
  for match in re.finditer(success_pattern3, clean_output):
273
288
  port = int(match.group(1))
@@ -295,8 +310,8 @@ def parse_msf_login_success(output: str, target: str, module: str) -> Dict[str,
295
310
  })
296
311
 
297
312
  # Try other patterns (username:password style)
298
- for pattern in [success_pattern1, success_pattern2, success_pattern_telnet]:
299
- for match in re.finditer(pattern, clean_output):
313
+ for pattern in [success_pattern1, success_pattern2, success_pattern_telnet, success_pattern_flexible]:
314
+ for match in re.finditer(pattern, clean_output, re.IGNORECASE):
300
315
  port = int(match.group(1))
301
316
  username = match.group(2)
302
317
  password = match.group(3)
@@ -449,34 +449,55 @@ def parse_nmap_text(output: str) -> Dict[str, Any]:
449
449
  version = None
450
450
 
451
451
  if raw_version:
452
- # Remove nmap metadata: "syn-ack ttl XX"
453
- cleaned = raw_version
454
- if cleaned.startswith('syn-ack'):
455
- parts_ver = cleaned.split()
456
- # Skip "syn-ack", "ttl", and the TTL number
457
- if 'ttl' in parts_ver:
458
- ttl_idx = parts_ver.index('ttl')
459
- cleaned = ' '.join(parts_ver[ttl_idx+2:]) # Skip "ttl XX"
460
- else:
461
- cleaned = ' '.join(parts_ver[1:]) # Skip "syn-ack"
462
-
463
- # Extract product and version
464
- # Pattern: "ProductName version.number rest of string"
465
- # Examples:
466
- # "ProFTPD 1.3.5" product="ProFTPD", version="1.3.5"
467
- # "Apache httpd 2.4.7 ((Ubuntu))" → product="Apache httpd", version="2.4.7"
468
- # "OpenSSH 6.6.1p1 Ubuntu 2ubuntu2.13" product="OpenSSH", version="6.6.1p1"
469
-
470
- version_pattern = r'([A-Za-z][\w\s\-\.]+?)\s+(v?\d+[\.\d]+[\w\-\.]*)'
471
- match = re.search(version_pattern, cleaned)
472
-
473
- if match:
474
- product = match.group(1).strip()
475
- version = match.group(2).strip()
476
- else:
477
- # Fallback: use cleaned string as version, service as product
452
+ try:
453
+ # Remove nmap metadata: "syn-ack ttl XX", "reset ttl XX", etc.
454
+ cleaned = raw_version
455
+ # Handle various nmap scan type prefixes
456
+ metadata_prefixes = ['syn-ack', 'reset', 'conn-refused', 'no-response']
457
+ for prefix in metadata_prefixes:
458
+ if cleaned.lower().startswith(prefix):
459
+ parts_ver = cleaned.split()
460
+ # Skip prefix and "ttl XX" if present
461
+ if len(parts_ver) > 1 and 'ttl' in parts_ver:
462
+ try:
463
+ ttl_idx = parts_ver.index('ttl')
464
+ cleaned = ' '.join(parts_ver[ttl_idx+2:]) # Skip "ttl XX"
465
+ except (ValueError, IndexError):
466
+ cleaned = ' '.join(parts_ver[1:]) # Skip just prefix
467
+ else:
468
+ cleaned = ' '.join(parts_ver[1:]) # Skip just prefix
469
+ break
470
+
471
+ # Extract product and version with multiple patterns
472
+ # Pattern: "ProductName version.number rest of string"
473
+ # Examples:
474
+ # "ProFTPD 1.3.5" → product="ProFTPD", version="1.3.5"
475
+ # "Apache httpd 2.4.7 ((Ubuntu))" → product="Apache httpd", version="2.4.7"
476
+ # "OpenSSH 6.6.1p1 Ubuntu 2ubuntu2.13" → product="OpenSSH", version="6.6.1p1"
477
+
478
+ version_patterns = [
479
+ r'([A-Za-z][\w\s\-\.]+?)\s+(v?\d+[\.\d]+[\w\-\.]*)', # Standard
480
+ r'^([A-Za-z][\w\-]+)\s+(\d[\w\.\-]+)', # ProductName vX.Y.Z
481
+ r'^([A-Za-z][\w\s]+?)\s+v?(\d+(?:\.\d+)+)', # "Product Name 1.2.3"
482
+ ]
483
+
484
+ matched = False
485
+ for pattern in version_patterns:
486
+ match = re.search(pattern, cleaned)
487
+ if match:
488
+ product = match.group(1).strip()
489
+ version = match.group(2).strip()
490
+ matched = True
491
+ break
492
+
493
+ if not matched:
494
+ # Fallback: use cleaned string as version, service as product
495
+ product = service_name
496
+ version = cleaned.strip() if cleaned.strip() else None
497
+ except Exception:
498
+ # If version parsing fails, use raw values
478
499
  product = service_name
479
- version = cleaned if cleaned else None
500
+ version = raw_version
480
501
 
481
502
  # Fallback: If service is unknown but port is a common web port, assume HTTP
482
503
  # This handles cases where nmap misidentifies or can't fingerprint web apps
@@ -92,9 +92,11 @@ def parse_smbmap_output(output: str, target: str = "") -> Dict[str, Any]:
92
92
  current_share = None
93
93
 
94
94
  for i, line in enumerate(lines):
95
- # Remove ANSI color codes and control characters
96
- line = re.sub(r'\x1b\[[0-9;]*m', '', line)
97
- line = re.sub(r'[\[\]\|/\\-]', '', line, count=1) # Remove progress indicators
95
+ # Remove ANSI color codes and control characters more thoroughly
96
+ line = re.sub(r'\x1b\[[0-9;]*[a-zA-Z]', '', line) # All ANSI escape sequences
97
+ line = re.sub(r'\x1b\].*?\x07', '', line) # OSC sequences
98
+ # Only remove leading progress indicators, not all brackets
99
+ line = re.sub(r'^[\[\]\|/\\-]+\s*', '', line)
98
100
  line = line.strip()
99
101
 
100
102
  # Extract target and status
@@ -126,29 +128,43 @@ def parse_smbmap_output(output: str, target: str = "") -> Dict[str, Any]:
126
128
  # Format: sharename <tabs/spaces> permissions <tabs/spaces> comment
127
129
  # tmp READ, WRITE oh noes!
128
130
 
131
+ share_name = None
132
+ permissions = None
133
+ comment = ''
134
+
129
135
  # Try tab split first
130
136
  parts = re.split(r'\t+', line)
131
- if len(parts) >= 3:
132
- # Tab-separated format
133
- share_name = parts[0].strip()
134
- permissions = parts[1].strip()
135
- comment = parts[2].strip() if len(parts) > 2 else ''
136
- elif len(parts) == 2:
137
- # Only 2 parts (share + permissions, no comment)
137
+ if len(parts) >= 2:
138
138
  share_name = parts[0].strip()
139
- permissions = parts[1].strip()
140
- comment = ''
141
- else:
142
- # No tabs - try space-based parsing
143
- # Match pattern: SHARENAME (spaces) PERMISSIONS (spaces) COMMENT
144
- # Need at least 2+ spaces to separate fields
145
- match = re.match(r'^\s*(\S+)\s{2,}(READ, WRITE|NO ACCESS|READ|WRITE)(?:\s{2,}(.*))?$', line)
146
- if match:
147
- share_name = match.group(1).strip()
148
- permissions = match.group(2).strip()
149
- comment = match.group(3).strip() if match.group(3) else ''
150
- else:
151
- continue
139
+ # Find permissions in remaining parts
140
+ for p in parts[1:]:
141
+ p = p.strip().upper()
142
+ if any(x in p for x in ['READ', 'WRITE', 'NO ACCESS', 'NOACCESS']):
143
+ permissions = p
144
+ break
145
+ # Comment is everything after permissions
146
+ if permissions and len(parts) > 2:
147
+ perm_idx = next((i for i, p in enumerate(parts) if permissions in p.upper()), -1)
148
+ if perm_idx >= 0 and perm_idx + 1 < len(parts):
149
+ comment = ' '.join(parts[perm_idx + 1:]).strip()
150
+
151
+ # No tabs or tab parse failed - try space-based parsing
152
+ if not permissions:
153
+ # Match patterns with flexible spacing and permission variations
154
+ permission_patterns = [
155
+ r'^\s*(\S+)\s{2,}(READ,?\s*WRITE|READ\s*ONLY|WRITE\s*ONLY|NO\s*ACCESS|READ|WRITE)(?:\s{2,}(.*))?$',
156
+ r'^\s*(\S+)\s+(READ,?\s*WRITE|READ\s*ONLY|WRITE\s*ONLY|NO\s*ACCESS|READ|WRITE)\s*(.*)$',
157
+ ]
158
+ for pattern in permission_patterns:
159
+ match = re.match(pattern, line, re.IGNORECASE)
160
+ if match:
161
+ share_name = match.group(1).strip()
162
+ permissions = match.group(2).strip().upper()
163
+ comment = match.group(3).strip() if match.group(3) else ''
164
+ break
165
+
166
+ if not share_name or not permissions:
167
+ continue
152
168
 
153
169
  # Skip empty lines or non-share lines
154
170
  if not share_name or share_name in ['Disk', 'IPC', '', '*']:
@@ -89,11 +89,13 @@ def parse_sqlmap_output(output: str, target: str = "") -> Dict[str, Any]:
89
89
  line = line.strip()
90
90
 
91
91
  # Extract URL being tested (GET requests typically)
92
- if 'testing URL' in line:
93
- url_match = re.search(r"testing URL '([^']+)'", line)
92
+ # Format variations: "testing URL 'http://...'" or 'testing URL "http://..."' or testing URL http://...
93
+ if 'testing URL' in line or 'testing url' in line.lower():
94
+ # Try single quotes first
95
+ url_match = re.search(r"testing URL ['\"]?([^'\"]+)['\"]?", line, re.IGNORECASE)
94
96
  if url_match:
95
- current_url = url_match.group(1)
96
- if current_url not in result['urls_tested']:
97
+ current_url = url_match.group(1).strip()
98
+ if current_url and current_url not in result['urls_tested']:
97
99
  result['urls_tested'].append(current_url)
98
100
 
99
101
  # Extract POST/GET URLs from form testing (crawl mode)
@@ -184,12 +186,19 @@ def parse_sqlmap_output(output: str, target: str = "") -> Dict[str, Any]:
184
186
  if next_line.startswith('[') or next_line.startswith('back-end'):
185
187
  break
186
188
 
187
- # Extract DBMS type
188
- if 'back-end DBMS:' in line:
189
- # Pattern: "back-end DBMS: MySQL >= 5.0.12"
190
- dbms_match = re.search(r"back-end DBMS:\s*([^\s]+)", line)
189
+ # Extract DBMS type with full version info
190
+ # Format variations:
191
+ # "back-end DBMS: MySQL >= 5.0.12"
192
+ # "back-end DBMS: Microsoft SQL Server 2019"
193
+ # "back-end DBMS: PostgreSQL"
194
+ if 'back-end DBMS:' in line or 'back-end dbms:' in line.lower():
195
+ dbms_match = re.search(r"back-end DBMS:\s*(.+)", line, re.IGNORECASE)
191
196
  if dbms_match and not result['dbms']:
192
- result['dbms'] = dbms_match.group(1)
197
+ dbms_full = dbms_match.group(1).strip()
198
+ # Extract just the DBMS name for the main field (first word)
199
+ # but store full version in a separate field
200
+ result['dbms'] = dbms_full.split()[0] if dbms_full else None
201
+ result['dbms_full'] = dbms_full # Keep full string
193
202
 
194
203
  # Extract web server OS
195
204
  if 'web server operating system:' in line.lower():
@@ -67,18 +67,19 @@ def parse_theharvester_output(output: str, target: str = "") -> Dict[str, Any]:
67
67
  if target_match:
68
68
  result['target'] = target_match.group(1)
69
69
 
70
- # Detect section headers
71
- elif '[*] ASNS found:' in line or 'ASNs found:' in line:
70
+ # Detect section headers (case-insensitive, multiple format variations)
71
+ line_lower = line.lower()
72
+ if any(x in line_lower for x in ['asns found', 'asn found', 'autonomous system']):
72
73
  current_section = 'asns'
73
- elif '[*] Interesting Urls found:' in line or '[*] URLs found:' in line:
74
+ elif any(x in line_lower for x in ['urls found', 'interesting urls', 'url found']):
74
75
  current_section = 'urls'
75
- elif '[*] IPs found:' in line:
76
+ elif any(x in line_lower for x in ['ips found', 'ip found', 'ip addresses']):
76
77
  current_section = 'ips'
77
- elif '[*] Emails found:' in line or 'Email addresses found:' in line:
78
+ elif any(x in line_lower for x in ['emails found', 'email found', 'email addresses']):
78
79
  current_section = 'emails'
79
- elif '[*] Hosts found:' in line or 'Hosts found:' in line:
80
+ elif any(x in line_lower for x in ['hosts found', 'host found', 'subdomains found', 'subdomain found']):
80
81
  current_section = 'hosts'
81
- elif '[*] People found:' in line or '[*] No people found' in line:
82
+ elif any(x in line_lower for x in ['people found', 'no people found', 'linkedin']):
82
83
  current_section = 'people' # We'll skip this for now
83
84
 
84
85
  # Skip separator lines and empty lines
@@ -117,18 +118,25 @@ def parse_theharvester_output(output: str, target: str = "") -> Dict[str, Any]:
117
118
  elif current_section == 'emails':
118
119
  # Email format: user@domain
119
120
  if '@' in line and '.' in line:
120
- # Basic email validation
121
- if re.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', line):
122
- if line not in result['emails']:
123
- result['emails'].append(line)
121
+ # More permissive email validation (supports international domains)
122
+ # Pattern allows: standard emails, plus-addressing, dots, underscores
123
+ email = line.strip().lower()
124
+ # Remove any leading/trailing brackets or quotes
125
+ email = re.sub(r'^[\[\(<\'\"]+|[\]\)>\'\"]$', '', email)
126
+ if re.match(r'^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$', email):
127
+ if email not in result['emails']:
128
+ result['emails'].append(email)
124
129
 
125
130
  elif current_section == 'hosts':
126
131
  # Host format: subdomain.domain.tld
127
132
  if '.' in line and not line.startswith('http'):
128
133
  # Clean and validate hostname
129
134
  host = line.strip().lower()
130
- # Basic validation: has at least one dot and no invalid chars
131
- if re.match(r'^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', host):
135
+ # Remove any leading/trailing brackets, quotes, or trailing dots
136
+ host = re.sub(r'^[\[\(<\'\"]+|[\]\)>\'\".]+$', '', host)
137
+ # More permissive validation: allows underscores (common in some hosts)
138
+ # and longer TLDs (some are 4+ chars)
139
+ if re.match(r'^[a-zA-Z0-9._-]+\.[a-zA-Z]{2,}$', host) and len(host) > 3:
132
140
  if host not in result['hosts']:
133
141
  result['hosts'].append(host)
134
142