souleyez 2.27.0__py3-none-any.whl → 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,615 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Engagement scope validation for target validation.
4
+
5
+ This module provides validation of targets against engagement scope definitions
6
+ to prevent scanning unauthorized targets.
7
+ """
8
+ import re
9
+ import ipaddress
10
+ import fnmatch
11
+ from dataclasses import dataclass
12
+ from typing import Dict, Any, List, Optional
13
+ from urllib.parse import urlparse
14
+
15
+ from souleyez.storage.database import get_db
16
+ from souleyez.log_config import get_logger
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ class ScopeViolationError(Exception):
22
+ """Raised when a target is out of scope and enforcement is 'block'."""
23
+ pass
24
+
25
+
26
+ @dataclass
27
+ class ScopeValidationResult:
28
+ """Result of scope validation check."""
29
+ is_in_scope: bool
30
+ matched_entry: Optional[Dict[str, Any]]
31
+ reason: str
32
+ scope_type: Optional[str] # 'cidr', 'domain', 'url', 'hostname', None
33
+
34
+
35
+ class ScopeValidator:
36
+ """
37
+ Validates targets against engagement scope definitions.
38
+
39
+ Usage:
40
+ validator = ScopeValidator(engagement_id)
41
+ result = validator.validate_target("192.168.1.100")
42
+ if not result.is_in_scope:
43
+ print(f"Out of scope: {result.reason}")
44
+ """
45
+
46
+ def __init__(self, engagement_id: int):
47
+ """
48
+ Initialize validator for an engagement.
49
+
50
+ Args:
51
+ engagement_id: The engagement to validate against
52
+ """
53
+ self.engagement_id = engagement_id
54
+ self.db = get_db()
55
+ self._scope_cache: Optional[List[Dict[str, Any]]] = None
56
+ self._enforcement_cache: Optional[str] = None
57
+
58
+ def get_scope_entries(self) -> List[Dict[str, Any]]:
59
+ """
60
+ Get all scope entries for this engagement.
61
+
62
+ Returns:
63
+ List of scope entry dicts with: id, scope_type, value, is_excluded, description
64
+ """
65
+ if self._scope_cache is not None:
66
+ return self._scope_cache
67
+
68
+ try:
69
+ entries = self.db.execute(
70
+ """SELECT id, scope_type, value, is_excluded, description
71
+ FROM engagement_scope
72
+ WHERE engagement_id = ?
73
+ ORDER BY is_excluded ASC, scope_type ASC""",
74
+ (self.engagement_id,)
75
+ )
76
+ self._scope_cache = entries
77
+ return entries
78
+ except Exception as e:
79
+ logger.warning("Failed to get scope entries", extra={
80
+ "engagement_id": self.engagement_id,
81
+ "error": str(e)
82
+ })
83
+ return []
84
+
85
+ def has_scope_defined(self) -> bool:
86
+ """
87
+ Check if the engagement has any scope entries defined.
88
+
89
+ Returns:
90
+ True if scope is defined, False otherwise
91
+ """
92
+ entries = self.get_scope_entries()
93
+ # Only count inclusion entries (not exclusions)
94
+ inclusions = [e for e in entries if not e.get('is_excluded')]
95
+ return len(inclusions) > 0
96
+
97
+ def get_enforcement_mode(self) -> str:
98
+ """
99
+ Get the enforcement mode for this engagement.
100
+
101
+ Returns:
102
+ 'off', 'warn', or 'block'
103
+ """
104
+ if self._enforcement_cache is not None:
105
+ return self._enforcement_cache
106
+
107
+ try:
108
+ result = self.db.execute_one(
109
+ "SELECT scope_enforcement FROM engagements WHERE id = ?",
110
+ (self.engagement_id,)
111
+ )
112
+ mode = result.get('scope_enforcement', 'off') if result else 'off'
113
+ self._enforcement_cache = mode or 'off'
114
+ return self._enforcement_cache
115
+ except Exception as e:
116
+ logger.warning("Failed to get enforcement mode", extra={
117
+ "engagement_id": self.engagement_id,
118
+ "error": str(e)
119
+ })
120
+ return 'off'
121
+
122
+ def validate_target(self, target: str) -> ScopeValidationResult:
123
+ """
124
+ Validate a target against the engagement scope.
125
+
126
+ Handles:
127
+ - URLs (extracts host for validation)
128
+ - IP addresses
129
+ - CIDR ranges
130
+ - Hostnames/domains
131
+
132
+ Args:
133
+ target: The target to validate (IP, URL, hostname, etc.)
134
+
135
+ Returns:
136
+ ScopeValidationResult with validation outcome
137
+ """
138
+ if not target or not target.strip():
139
+ return ScopeValidationResult(
140
+ is_in_scope=False,
141
+ matched_entry=None,
142
+ reason="Empty target",
143
+ scope_type=None
144
+ )
145
+
146
+ target = target.strip()
147
+
148
+ # If no scope defined, everything is in scope (permissive default)
149
+ if not self.has_scope_defined():
150
+ return ScopeValidationResult(
151
+ is_in_scope=True,
152
+ matched_entry=None,
153
+ reason="No scope defined (permissive)",
154
+ scope_type=None
155
+ )
156
+
157
+ # Determine target type and extract relevant part
158
+ target_type, normalized = self._parse_target(target)
159
+
160
+ # Check against scope entries
161
+ entries = self.get_scope_entries()
162
+
163
+ # First check exclusions (deny rules take precedence)
164
+ for entry in entries:
165
+ if not entry.get('is_excluded'):
166
+ continue
167
+ if self._matches_entry(normalized, target_type, entry):
168
+ return ScopeValidationResult(
169
+ is_in_scope=False,
170
+ matched_entry=entry,
171
+ reason=f"Explicitly excluded by scope entry: {entry['value']}",
172
+ scope_type=entry['scope_type']
173
+ )
174
+
175
+ # Then check inclusions
176
+ for entry in entries:
177
+ if entry.get('is_excluded'):
178
+ continue
179
+ if self._matches_entry(normalized, target_type, entry):
180
+ return ScopeValidationResult(
181
+ is_in_scope=True,
182
+ matched_entry=entry,
183
+ reason=f"Matched scope entry: {entry['value']}",
184
+ scope_type=entry['scope_type']
185
+ )
186
+
187
+ # No match found - out of scope
188
+ return ScopeValidationResult(
189
+ is_in_scope=False,
190
+ matched_entry=None,
191
+ reason=f"Target '{target}' does not match any scope entry",
192
+ scope_type=None
193
+ )
194
+
195
+ def validate_ip(self, ip: str) -> ScopeValidationResult:
196
+ """
197
+ Validate an IP address against scope.
198
+
199
+ Args:
200
+ ip: IP address string
201
+
202
+ Returns:
203
+ ScopeValidationResult
204
+ """
205
+ return self.validate_target(ip)
206
+
207
+ def validate_domain(self, domain: str) -> ScopeValidationResult:
208
+ """
209
+ Validate a domain against scope.
210
+
211
+ Args:
212
+ domain: Domain name
213
+
214
+ Returns:
215
+ ScopeValidationResult
216
+ """
217
+ return self.validate_target(domain)
218
+
219
+ def validate_url(self, url: str) -> ScopeValidationResult:
220
+ """
221
+ Validate a URL against scope.
222
+
223
+ Args:
224
+ url: URL string
225
+
226
+ Returns:
227
+ ScopeValidationResult
228
+ """
229
+ return self.validate_target(url)
230
+
231
+ def log_validation(self, target: str, result: ScopeValidationResult,
232
+ action: str, job_id: int = None) -> None:
233
+ """
234
+ Log a validation result to the audit trail.
235
+
236
+ Args:
237
+ target: The target that was validated
238
+ result: The validation result
239
+ action: Action taken ('allowed', 'blocked', 'warned')
240
+ job_id: Optional job ID associated with this validation
241
+ """
242
+ try:
243
+ from souleyez.auth import get_current_user
244
+ user = get_current_user()
245
+ user_id = user.id if user else None
246
+ except Exception:
247
+ user_id = None
248
+
249
+ validation_result = 'in_scope' if result.is_in_scope else 'out_of_scope'
250
+ if not self.has_scope_defined():
251
+ validation_result = 'no_scope_defined'
252
+
253
+ try:
254
+ self.db.insert('scope_validation_log', {
255
+ 'engagement_id': self.engagement_id,
256
+ 'job_id': job_id,
257
+ 'target': target,
258
+ 'validation_result': validation_result,
259
+ 'action_taken': action,
260
+ 'matched_scope_id': result.matched_entry.get('id') if result.matched_entry else None,
261
+ 'user_id': user_id
262
+ })
263
+ except Exception as e:
264
+ logger.warning("Failed to log scope validation", extra={
265
+ "engagement_id": self.engagement_id,
266
+ "target": target,
267
+ "error": str(e)
268
+ })
269
+
270
+ def _parse_target(self, target: str) -> tuple:
271
+ """
272
+ Parse target to determine type and normalize.
273
+
274
+ Returns:
275
+ (target_type, normalized_value)
276
+ target_type is one of: 'ip', 'cidr', 'domain', 'url'
277
+ """
278
+ # Check if URL
279
+ if target.startswith(('http://', 'https://')):
280
+ parsed = urlparse(target)
281
+ host = parsed.netloc.split(':')[0] # Remove port
282
+ # Check if host part is IP
283
+ try:
284
+ ipaddress.ip_address(host)
285
+ return ('ip', host)
286
+ except ValueError:
287
+ return ('domain', host.lower())
288
+
289
+ # Check if IP address
290
+ try:
291
+ ipaddress.ip_address(target)
292
+ return ('ip', target)
293
+ except ValueError:
294
+ pass
295
+
296
+ # Check if CIDR notation
297
+ if '/' in target:
298
+ try:
299
+ ipaddress.ip_network(target, strict=False)
300
+ return ('cidr', target)
301
+ except ValueError:
302
+ pass
303
+
304
+ # Assume domain/hostname
305
+ return ('domain', target.lower())
306
+
307
+ def _matches_entry(self, target: str, target_type: str, entry: Dict[str, Any]) -> bool:
308
+ """
309
+ Check if a target matches a scope entry.
310
+
311
+ Args:
312
+ target: Normalized target value
313
+ target_type: Type of target ('ip', 'cidr', 'domain', 'url')
314
+ entry: Scope entry dict
315
+
316
+ Returns:
317
+ True if matches, False otherwise
318
+ """
319
+ entry_type = entry['scope_type']
320
+ entry_value = entry['value']
321
+
322
+ # IP target
323
+ if target_type == 'ip':
324
+ if entry_type == 'cidr':
325
+ return self._ip_in_cidr(target, entry_value)
326
+ elif entry_type == 'hostname':
327
+ # Exact IP match
328
+ return target == entry_value
329
+ elif entry_type == 'domain':
330
+ # IP doesn't match domain patterns
331
+ return False
332
+ elif entry_type == 'url':
333
+ # Extract host from URL entry
334
+ try:
335
+ parsed = urlparse(entry_value)
336
+ return target == parsed.netloc.split(':')[0]
337
+ except Exception:
338
+ return False
339
+
340
+ # CIDR target (less common - check containment)
341
+ elif target_type == 'cidr':
342
+ if entry_type == 'cidr':
343
+ return self._cidr_overlaps(target, entry_value)
344
+ return False
345
+
346
+ # Domain target
347
+ elif target_type == 'domain':
348
+ if entry_type == 'domain':
349
+ return self._domain_matches(target, entry_value)
350
+ elif entry_type == 'hostname':
351
+ # Exact hostname match
352
+ return target.lower() == entry_value.lower()
353
+ elif entry_type == 'url':
354
+ # Extract host from URL entry
355
+ try:
356
+ parsed = urlparse(entry_value)
357
+ entry_host = parsed.netloc.split(':')[0].lower()
358
+ return target == entry_host or self._domain_matches(target, entry_host)
359
+ except Exception:
360
+ return False
361
+ return False
362
+
363
+ # URL target (handled by extracting host above)
364
+ return False
365
+
366
+ def _ip_in_cidr(self, ip: str, cidr: str) -> bool:
367
+ """Check if IP is within CIDR range."""
368
+ try:
369
+ ip_obj = ipaddress.ip_address(ip)
370
+ network = ipaddress.ip_network(cidr, strict=False)
371
+ return ip_obj in network
372
+ except ValueError:
373
+ return False
374
+
375
+ def _cidr_overlaps(self, cidr1: str, cidr2: str) -> bool:
376
+ """Check if two CIDR ranges overlap."""
377
+ try:
378
+ net1 = ipaddress.ip_network(cidr1, strict=False)
379
+ net2 = ipaddress.ip_network(cidr2, strict=False)
380
+ return net1.overlaps(net2)
381
+ except ValueError:
382
+ return False
383
+
384
+ def _domain_matches(self, target: str, pattern: str) -> bool:
385
+ """
386
+ Check if domain matches a pattern.
387
+
388
+ Supports wildcards:
389
+ - *.example.com matches sub.example.com, deep.sub.example.com
390
+ - example.com matches example.com only
391
+
392
+ Args:
393
+ target: Target domain (lowercase)
394
+ pattern: Pattern to match against
395
+
396
+ Returns:
397
+ True if matches
398
+ """
399
+ pattern = pattern.lower()
400
+ target = target.lower()
401
+
402
+ # Handle wildcard patterns
403
+ if pattern.startswith('*.'):
404
+ # Remove the *. prefix for suffix matching
405
+ suffix = pattern[2:]
406
+ # Match exact suffix or .suffix
407
+ return target == suffix or target.endswith('.' + suffix)
408
+
409
+ # Handle wildcards in other positions using fnmatch
410
+ if '*' in pattern or '?' in pattern:
411
+ return fnmatch.fnmatch(target, pattern)
412
+
413
+ # Exact match
414
+ return target == pattern
415
+
416
+
417
+ class ScopeManager:
418
+ """
419
+ Manages scope definitions for engagements.
420
+
421
+ Usage:
422
+ manager = ScopeManager()
423
+ manager.add_scope(engagement_id, 'cidr', '192.168.1.0/24')
424
+ manager.add_scope(engagement_id, 'domain', '*.example.com')
425
+ manager.set_enforcement(engagement_id, 'warn')
426
+ """
427
+
428
+ def __init__(self):
429
+ self.db = get_db()
430
+
431
+ def add_scope(self, engagement_id: int, scope_type: str, value: str,
432
+ is_excluded: bool = False, description: str = None) -> int:
433
+ """
434
+ Add a scope entry for an engagement.
435
+
436
+ Args:
437
+ engagement_id: Engagement ID
438
+ scope_type: Type of scope ('cidr', 'domain', 'url', 'hostname')
439
+ value: Scope value (e.g., '192.168.1.0/24', '*.example.com')
440
+ is_excluded: If True, this is an exclusion (deny rule)
441
+ description: Optional description
442
+
443
+ Returns:
444
+ ID of created scope entry
445
+
446
+ Raises:
447
+ ValueError: If scope_type or value is invalid
448
+ """
449
+ valid_types = ['cidr', 'domain', 'url', 'hostname']
450
+ if scope_type not in valid_types:
451
+ raise ValueError(f"Invalid scope_type: {scope_type}. Must be one of: {valid_types}")
452
+
453
+ # Validate the value based on type
454
+ self._validate_scope_value(scope_type, value)
455
+
456
+ try:
457
+ from souleyez.auth import get_current_user
458
+ user = get_current_user()
459
+ added_by = user.id if user else None
460
+ except Exception:
461
+ added_by = None
462
+
463
+ return self.db.insert('engagement_scope', {
464
+ 'engagement_id': engagement_id,
465
+ 'scope_type': scope_type,
466
+ 'value': value,
467
+ 'is_excluded': is_excluded,
468
+ 'description': description,
469
+ 'added_by': added_by
470
+ })
471
+
472
+ def remove_scope(self, scope_id: int) -> bool:
473
+ """
474
+ Remove a scope entry by ID.
475
+
476
+ Args:
477
+ scope_id: ID of scope entry to remove
478
+
479
+ Returns:
480
+ True if removed, False if not found
481
+ """
482
+ try:
483
+ self.db.execute(
484
+ "DELETE FROM engagement_scope WHERE id = ?",
485
+ (scope_id,)
486
+ )
487
+ return True
488
+ except Exception as e:
489
+ logger.warning("Failed to remove scope entry", extra={
490
+ "scope_id": scope_id,
491
+ "error": str(e)
492
+ })
493
+ return False
494
+
495
+ def list_scope(self, engagement_id: int) -> List[Dict[str, Any]]:
496
+ """
497
+ List all scope entries for an engagement.
498
+
499
+ Args:
500
+ engagement_id: Engagement ID
501
+
502
+ Returns:
503
+ List of scope entry dicts
504
+ """
505
+ return self.db.execute(
506
+ """SELECT id, scope_type, value, is_excluded, description, added_by, created_at
507
+ FROM engagement_scope
508
+ WHERE engagement_id = ?
509
+ ORDER BY is_excluded ASC, scope_type ASC, value ASC""",
510
+ (engagement_id,)
511
+ )
512
+
513
+ def set_enforcement(self, engagement_id: int, mode: str) -> bool:
514
+ """
515
+ Set enforcement mode for an engagement.
516
+
517
+ Args:
518
+ engagement_id: Engagement ID
519
+ mode: 'off', 'warn', or 'block'
520
+
521
+ Returns:
522
+ True if updated successfully
523
+
524
+ Raises:
525
+ ValueError: If mode is invalid
526
+ """
527
+ valid_modes = ['off', 'warn', 'block']
528
+ if mode not in valid_modes:
529
+ raise ValueError(f"Invalid enforcement mode: {mode}. Must be one of: {valid_modes}")
530
+
531
+ try:
532
+ self.db.execute(
533
+ "UPDATE engagements SET scope_enforcement = ? WHERE id = ?",
534
+ (mode, engagement_id)
535
+ )
536
+ return True
537
+ except Exception as e:
538
+ logger.warning("Failed to set enforcement mode", extra={
539
+ "engagement_id": engagement_id,
540
+ "mode": mode,
541
+ "error": str(e)
542
+ })
543
+ return False
544
+
545
+ def get_validation_log(self, engagement_id: int, limit: int = 100) -> List[Dict[str, Any]]:
546
+ """
547
+ Get scope validation log for an engagement.
548
+
549
+ Args:
550
+ engagement_id: Engagement ID
551
+ limit: Maximum entries to return
552
+
553
+ Returns:
554
+ List of validation log entries
555
+ """
556
+ return self.db.execute(
557
+ """SELECT id, job_id, target, validation_result, action_taken,
558
+ matched_scope_id, user_id, created_at
559
+ FROM scope_validation_log
560
+ WHERE engagement_id = ?
561
+ ORDER BY created_at DESC
562
+ LIMIT ?""",
563
+ (engagement_id, limit)
564
+ )
565
+
566
+ def _validate_scope_value(self, scope_type: str, value: str) -> None:
567
+ """
568
+ Validate scope value based on type.
569
+
570
+ Raises:
571
+ ValueError: If value is invalid for the scope type
572
+ """
573
+ if not value or not value.strip():
574
+ raise ValueError("Scope value cannot be empty")
575
+
576
+ value = value.strip()
577
+
578
+ if scope_type == 'cidr':
579
+ try:
580
+ ipaddress.ip_network(value, strict=False)
581
+ except ValueError:
582
+ raise ValueError(f"Invalid CIDR notation: {value}")
583
+
584
+ elif scope_type == 'hostname':
585
+ # Basic hostname validation (can be IP or hostname)
586
+ try:
587
+ ipaddress.ip_address(value)
588
+ except ValueError:
589
+ # Not an IP, validate as hostname
590
+ if len(value) > 253:
591
+ raise ValueError("Hostname too long (max 253 characters)")
592
+ if not re.match(r'^[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)*$', value):
593
+ raise ValueError(f"Invalid hostname format: {value}")
594
+
595
+ elif scope_type == 'domain':
596
+ # Allow wildcards like *.example.com
597
+ if value.startswith('*.'):
598
+ domain_part = value[2:]
599
+ else:
600
+ domain_part = value
601
+
602
+ if len(domain_part) > 253:
603
+ raise ValueError("Domain too long (max 253 characters)")
604
+ if not re.match(r'^[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)*$', domain_part):
605
+ raise ValueError(f"Invalid domain format: {value}")
606
+
607
+ elif scope_type == 'url':
608
+ if not value.startswith(('http://', 'https://')):
609
+ raise ValueError("URL must start with http:// or https://")
610
+ try:
611
+ parsed = urlparse(value)
612
+ if not parsed.netloc:
613
+ raise ValueError(f"Invalid URL (no host): {value}")
614
+ except Exception:
615
+ raise ValueError(f"Invalid URL format: {value}")