superencryptx 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: superencryptx
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Scan a repo for secrets and encrypt/decrypt them in-place.
5
5
  Author: Superencrypt Contributors
6
6
  License: MIT License
@@ -46,8 +46,9 @@ Requires-Dist: cryptography>=41.0.0
46
46
  Dynamic: license-file
47
47
 
48
48
  # superencrypt
49
+ https://pypi.org/project/superencryptx/0.1.0/
49
50
 
50
- CLI to scan a repo for secrets (including env files), encrypt them in-place, and decrypt them later using a key.
51
+ CLI to scan a repo for secrets (including env files, Dockerfiles, compose files, and YAML/TOML/JSON/INI-style configs), encrypt them in-place, and decrypt them later using a key.
51
52
 
52
53
  ## Why
53
54
 
@@ -92,6 +93,13 @@ superencrypt decrypt --key-file .superencrypt.key
92
93
 
93
94
  # Scan only (no changes)
94
95
  superencrypt scan
96
+
97
+ # Scan a single file
98
+ superencrypt scan --file path/to/file
99
+
100
+ # Encrypt/decrypt a single file
101
+ superencrypt encrypt --file path/to/file
102
+ superencrypt decrypt --file path/to/file --key-file .superencrypt.key
95
103
  ```
96
104
 
97
105
  ## Pipeline example
@@ -108,6 +116,7 @@ superencrypt decrypt --key "$SUPERENCRYPT_KEY"
108
116
  - Use `scan` first to review matches.
109
117
 
110
118
  ## Development
119
+ https://pypi.org/project/superencryptx/0.1.0/
111
120
 
112
121
  ```bash
113
122
  python -m venv .venv
@@ -1,6 +1,7 @@
1
1
  # superencrypt
2
+ https://pypi.org/project/superencryptx/0.1.0/
2
3
 
3
- CLI to scan a repo for secrets (including env files), encrypt them in-place, and decrypt them later using a key.
4
+ CLI to scan a repo for secrets (including env files, Dockerfiles, compose files, and YAML/TOML/JSON/INI-style configs), encrypt them in-place, and decrypt them later using a key.
4
5
 
5
6
  ## Why
6
7
 
@@ -45,6 +46,13 @@ superencrypt decrypt --key-file .superencrypt.key
45
46
 
46
47
  # Scan only (no changes)
47
48
  superencrypt scan
49
+
50
+ # Scan a single file
51
+ superencrypt scan --file path/to/file
52
+
53
+ # Encrypt/decrypt a single file
54
+ superencrypt encrypt --file path/to/file
55
+ superencrypt decrypt --file path/to/file --key-file .superencrypt.key
48
56
  ```
49
57
 
50
58
  ## Pipeline example
@@ -61,6 +69,7 @@ superencrypt decrypt --key "$SUPERENCRYPT_KEY"
61
69
  - Use `scan` first to review matches.
62
70
 
63
71
  ## Development
72
+ https://pypi.org/project/superencryptx/0.1.0/
64
73
 
65
74
  ```bash
66
75
  python -m venv .venv
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superencryptx"
3
- version = "0.1.0"
3
+ version = "0.1.1"
4
4
  description = "Scan a repo for secrets and encrypt/decrypt them in-place."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  from typing import List
7
7
 
8
8
  from .crypto import Crypto
9
- from .scanner import scan_repo, iter_repo_files
9
+ from .scanner import scan_repo, scan_path, iter_repo_files
10
10
  from .transform import encrypt_file, decrypt_file
11
11
 
12
12
 
@@ -26,8 +26,15 @@ def _write_key_file(path: Path, key: bytes) -> None:
26
26
 
27
27
 
28
28
  def cmd_scan(args: argparse.Namespace) -> int:
29
- root = Path(args.root).resolve()
30
- findings = scan_repo(root)
29
+ if args.file:
30
+ path = Path(args.file).resolve()
31
+ if not path.exists():
32
+ raise SystemExit(f"File not found: {path}")
33
+ findings = scan_path(path)
34
+ root = path.parent
35
+ else:
36
+ root = Path(args.root).resolve()
37
+ findings = scan_repo(root)
31
38
  if not findings:
32
39
  print("No secrets found.")
33
40
  return 0
@@ -50,10 +57,18 @@ def cmd_encrypt(args: argparse.Namespace) -> int:
50
57
  crypto = Crypto(key)
51
58
 
52
59
  changed_files: List[Path] = []
53
- for path in iter_repo_files(root):
60
+ if args.file:
61
+ path = Path(args.file).resolve()
62
+ if not path.exists():
63
+ raise SystemExit(f"File not found: {path}")
54
64
  result = encrypt_file(path, crypto)
55
65
  if result.changed:
56
66
  changed_files.append(result.path)
67
+ else:
68
+ for path in iter_repo_files(root):
69
+ result = encrypt_file(path, crypto)
70
+ if result.changed:
71
+ changed_files.append(result.path)
57
72
  if changed_files:
58
73
  print(f"Encrypted {len(changed_files)} files.")
59
74
  else:
@@ -67,10 +82,18 @@ def cmd_decrypt(args: argparse.Namespace) -> int:
67
82
  crypto = Crypto(key)
68
83
 
69
84
  changed_files: List[Path] = []
70
- for path in iter_repo_files(root):
85
+ if args.file:
86
+ path = Path(args.file).resolve()
87
+ if not path.exists():
88
+ raise SystemExit(f"File not found: {path}")
71
89
  result = decrypt_file(path, crypto)
72
90
  if result.changed:
73
91
  changed_files.append(result.path)
92
+ else:
93
+ for path in iter_repo_files(root):
94
+ result = decrypt_file(path, crypto)
95
+ if result.changed:
96
+ changed_files.append(result.path)
74
97
  if changed_files:
75
98
  print(f"Decrypted {len(changed_files)} files.")
76
99
  else:
@@ -80,19 +103,21 @@ def cmd_decrypt(args: argparse.Namespace) -> int:
80
103
 
81
104
  def build_parser() -> argparse.ArgumentParser:
82
105
  parser = argparse.ArgumentParser(prog="superencrypt")
83
- parser.add_argument("--root", default=".", help="Root directory to scan")
106
+ parent = argparse.ArgumentParser(add_help=False)
107
+ parent.add_argument("--root", default=".", help="Root directory to scan")
108
+ parent.add_argument("--file", help="Scan/encrypt/decrypt a single file")
84
109
 
85
110
  subparsers = parser.add_subparsers(dest="command", required=True)
86
111
 
87
- scan_parser = subparsers.add_parser("scan", help="Scan repo for secrets")
112
+ scan_parser = subparsers.add_parser("scan", help="Scan repo for secrets", parents=[parent])
88
113
  scan_parser.set_defaults(func=cmd_scan)
89
114
 
90
- encrypt_parser = subparsers.add_parser("encrypt", help="Encrypt secrets in-place")
115
+ encrypt_parser = subparsers.add_parser("encrypt", help="Encrypt secrets in-place", parents=[parent])
91
116
  encrypt_parser.add_argument("--key", help="Base64 key string")
92
117
  encrypt_parser.add_argument("--key-file", help="Path to key file")
93
118
  encrypt_parser.set_defaults(func=cmd_encrypt)
94
119
 
95
- decrypt_parser = subparsers.add_parser("decrypt", help="Decrypt secrets in-place")
120
+ decrypt_parser = subparsers.add_parser("decrypt", help="Decrypt secrets in-place", parents=[parent])
96
121
  decrypt_parser.add_argument("--key", help="Base64 key string")
97
122
  decrypt_parser.add_argument("--key-file", help="Path to key file")
98
123
  decrypt_parser.set_defaults(func=cmd_decrypt)
@@ -0,0 +1,261 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import re
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Iterable, Iterator, List, Optional
8
+
9
+
10
+ SKIP_DIRS = {
11
+ ".git",
12
+ ".hg",
13
+ ".svn",
14
+ "node_modules",
15
+ "dist",
16
+ "build",
17
+ ".venv",
18
+ "venv",
19
+ "__pycache__",
20
+ "vendor",
21
+ "docs",
22
+ "doc",
23
+ }
24
+
25
+ SKIP_FILES = {
26
+ ".superencrypt.key",
27
+ "README.md",
28
+ "mvnw",
29
+ "mvnw.cmd",
30
+ }
31
+
32
+ ENV_FILE_PATTERNS = (
33
+ ".env",
34
+ ".env.",
35
+ ".envrc",
36
+ )
37
+
38
+ SENSITIVE_KEYWORDS = re.compile(
39
+ r"(?i)(password|passwd|secret|token|api[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret)"
40
+ )
41
+
42
+ NON_SECRET_HINTS = re.compile(
43
+ r"(?i)\b(password|passwd|secret|token|api[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret)\b"
44
+ )
45
+
46
+ URL_PATTERN = re.compile(r"(?i)^[a-z][a-z0-9+.-]*://")
47
+ ENV_REF_PATTERN = re.compile(r"^\$(\{[^}]+\}|[A-Za-z_][A-Za-z0-9_]*)$")
48
+ BOOL_PATTERN = re.compile(r"(?i)^(true|false|yes|no|on|off)$")
49
+ PORT_PATTERN = re.compile(r"^\d{2,5}$")
50
+ LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0"}
51
+ FILE_LIKE_PATTERN = re.compile(r"(?i)\.(zip|tar\.gz|tgz|jar|war|css|js|map|png|jpg|jpeg|gif|svg)$")
52
+ HOSTNAME_PATTERN = re.compile(r"(?i)^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9-]+)+$")
53
+ PLACEHOLDER_PATTERN = re.compile(r"(?i)^\*{2,}.*\*{2,}$")
54
+ TEMPLATE_PATTERN = re.compile(r"(\$\{[^}]+\}|\$\([^)]+\)|\$[A-Za-z_][A-Za-z0-9_]*|\{\{[^}]+\}\}|<%[^%]+%>)")
55
+ REFERENCE_TOKEN_PATTERN = re.compile(
56
+ r"(?i)\b(var|local|data|module|path|terraform|each|count)\.[A-Za-z0-9_.-]+\b"
57
+ )
58
+ ARN_PATTERN = re.compile(r"^arn:aws:[a-z0-9-]+:[a-z0-9-]*:\d{0,12}:[^\\s]+$", re.IGNORECASE)
59
+ TERRAFORM_REF_PATTERN = re.compile(
60
+ r"(?i)^(?:var|local|data|module|path|terraform|each|count)\.[A-Za-z0-9_.-]+$"
61
+ )
62
+ TERRAFORM_RESOURCE_PATTERN = re.compile(r"(?i)^[a-z][a-z0-9_-]*\.[A-Za-z0-9_.-]+$")
63
+ TERRAFORM_FUNC_PATTERN = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*\s*\(.*\)$")
64
+
65
+
66
+ @dataclass
67
+ class Finding:
68
+ path: Path
69
+ line_number: int
70
+ key: Optional[str]
71
+ value: str
72
+
73
+
74
+ @dataclass
75
+ class SecretPattern:
76
+ name: str
77
+ regex: re.Pattern
78
+ group: int
79
+
80
+
81
+ SECRET_PATTERNS: List[SecretPattern] = [
82
+ SecretPattern(
83
+ name="aws_access_key_id",
84
+ regex=re.compile(r"\b(AKIA[0-9A-Z]{16})\b"),
85
+ group=1,
86
+ ),
87
+ SecretPattern(
88
+ name="aws_secret_access_key",
89
+ regex=re.compile(r"(?i)aws_secret_access_key\s*[:=]\s*([A-Za-z0-9/+=]{40})"),
90
+ group=1,
91
+ ),
92
+ SecretPattern(
93
+ name="docker_env_or_arg",
94
+ regex=re.compile(
95
+ r"(?i)\b(?:ENV|ARG)\s+"
96
+ r"(?:[A-Z0-9_]*?(?:password|passwd|secret|token|api[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret)[A-Z0-9_]*)"
97
+ r"(?:\s*=\s*|\s+)"
98
+ r"(\"[^\"]+\"|'[^']+'|[^\s#]+)"
99
+ ),
100
+ group=1,
101
+ ),
102
+ SecretPattern(
103
+ name="generic_assignment",
104
+ regex=re.compile(
105
+ r"(?i)(?:password|passwd|secret|token|api[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret)\s*[:=]\s*(\"[^\"]+\"|'[^']+'|[^\s#]+)"
106
+ ),
107
+ group=1,
108
+ ),
109
+ ]
110
+
111
+
112
+ def _is_env_file(path: Path) -> bool:
113
+ name = path.name
114
+ if name == ".env" or name.startswith(".env.") or name.endswith(".env"):
115
+ return True
116
+ return name in ENV_FILE_PATTERNS
117
+
118
+
119
+ def _is_binary(data: bytes) -> bool:
120
+ return b"\x00" in data
121
+
122
+
123
+ def _normalize_value(value: str) -> str:
124
+ raw = value.strip()
125
+ if raw.startswith(('"', "'")) and raw.endswith(('"', "'")) and len(raw) >= 2:
126
+ return raw[1:-1]
127
+ return raw
128
+
129
+
130
+ def _is_probable_secret(value: str, context: str) -> bool:
131
+ raw = _normalize_value(value)
132
+ if not raw:
133
+ return False
134
+ lowered = raw.lower()
135
+ if TEMPLATE_PATTERN.search(raw):
136
+ return False
137
+ if PLACEHOLDER_PATTERN.match(raw):
138
+ return False
139
+ if lowered in LOCAL_HOSTS:
140
+ return False
141
+ if BOOL_PATTERN.fullmatch(raw):
142
+ return False
143
+ if PORT_PATTERN.fullmatch(raw):
144
+ return False
145
+ if URL_PATTERN.match(raw):
146
+ return False
147
+ if ENV_REF_PATTERN.match(raw):
148
+ return False
149
+ if REFERENCE_TOKEN_PATTERN.search(raw):
150
+ return False
151
+ if HOSTNAME_PATTERN.match(raw):
152
+ return False
153
+ if ARN_PATTERN.match(raw):
154
+ return False
155
+ if raw.startswith(("/", "./", "../")):
156
+ return False
157
+ if FILE_LIKE_PATTERN.search(raw):
158
+ return False
159
+ has_secret_hint = NON_SECRET_HINTS.search(context) is not None
160
+ if has_secret_hint:
161
+ if len(raw) < 6:
162
+ return False
163
+ else:
164
+ if len(raw) < 12:
165
+ return False
166
+ if re.fullmatch(r"[a-z]+", raw):
167
+ return False
168
+ if re.fullmatch(r"[A-Za-z0-9._-]+", raw) and len(raw) < 16:
169
+ return False
170
+ classes = sum(
171
+ bool(re.search(p, raw))
172
+ for p in (r"[a-z]", r"[A-Z]", r"\d", r"[^A-Za-z0-9]")
173
+ )
174
+ if classes < 2:
175
+ return False
176
+ return True
177
+
178
+
179
+ def _is_terraform_reference(value: str) -> bool:
180
+ raw = _normalize_value(value)
181
+ if not raw:
182
+ return False
183
+ if TERRAFORM_FUNC_PATTERN.match(raw):
184
+ return True
185
+ if TERRAFORM_REF_PATTERN.match(raw):
186
+ return True
187
+ if TERRAFORM_RESOURCE_PATTERN.match(raw):
188
+ return True
189
+ return False
190
+
191
+
192
+ def iter_repo_files(root: Path) -> Iterator[Path]:
193
+ for dirpath, dirnames, filenames in os.walk(root):
194
+ dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
195
+ for filename in filenames:
196
+ if filename in SKIP_FILES:
197
+ continue
198
+ path = Path(dirpath) / filename
199
+ parts = set(path.parts)
200
+ if parts.intersection({"test", "tests", "__tests__", "spec"}):
201
+ continue
202
+ if ("gradle" in parts and "wrapper" in parts and filename == "gradle-wrapper.properties"):
203
+ continue
204
+ if (".mvn" in parts and "wrapper" in parts and filename == "maven-wrapper.properties"):
205
+ continue
206
+ yield path
207
+
208
+
209
+ def scan_env_file(path: Path) -> List[Finding]:
210
+ findings: List[Finding] = []
211
+ text = path.read_text(encoding="utf-8", errors="ignore")
212
+ for idx, line in enumerate(text.splitlines(), start=1):
213
+ stripped = line.strip()
214
+ if not stripped or stripped.startswith("#"):
215
+ continue
216
+ if stripped.startswith("export "):
217
+ stripped = stripped[len("export ") :]
218
+ if "=" not in stripped:
219
+ continue
220
+ key, value = stripped.split("=", 1)
221
+ key = key.strip()
222
+ value = value.strip().strip('"').strip("'")
223
+ if SENSITIVE_KEYWORDS.search(key) and _is_probable_secret(value, key):
224
+ findings.append(Finding(path=path, line_number=idx, key=key, value=value))
225
+ return findings
226
+
227
+
228
+ def scan_file_for_patterns(path: Path) -> List[Finding]:
229
+ data = path.read_bytes()
230
+ if _is_binary(data):
231
+ return []
232
+ text = data.decode("utf-8", errors="ignore")
233
+ findings: List[Finding] = []
234
+ for idx, line in enumerate(text.splitlines(), start=1):
235
+ for pattern in SECRET_PATTERNS:
236
+ match = pattern.regex.search(line)
237
+ if not match:
238
+ continue
239
+ value = match.group(pattern.group)
240
+ if path.suffix in {".tf", ".tfvars"} and _is_terraform_reference(value):
241
+ continue
242
+ if not _is_probable_secret(value, match.group(0)):
243
+ continue
244
+ findings.append(Finding(path=path, line_number=idx, key=pattern.name, value=value))
245
+ return findings
246
+
247
+
248
+ def scan_repo(root: Path) -> List[Finding]:
249
+ findings: List[Finding] = []
250
+ for path in iter_repo_files(root):
251
+ if _is_env_file(path):
252
+ findings.extend(scan_env_file(path))
253
+ continue
254
+ findings.extend(scan_file_for_patterns(path))
255
+ return findings
256
+
257
+
258
+ def scan_path(path: Path) -> List[Finding]:
259
+ if _is_env_file(path):
260
+ return scan_env_file(path)
261
+ return scan_file_for_patterns(path)
@@ -6,7 +6,14 @@ from pathlib import Path
6
6
  from typing import Iterable, List
7
7
 
8
8
  from .crypto import Crypto, is_encrypted_value, wrap_encrypted, unwrap_encrypted
9
- from .scanner import SECRET_PATTERNS, SENSITIVE_KEYWORDS, _is_env_file, _is_binary
9
+ from .scanner import (
10
+ SECRET_PATTERNS,
11
+ SENSITIVE_KEYWORDS,
12
+ _is_env_file,
13
+ _is_binary,
14
+ _is_probable_secret,
15
+ _is_terraform_reference,
16
+ )
10
17
 
11
18
 
12
19
  @dataclass
@@ -37,6 +44,8 @@ def _encrypt_env_lines(text: str, crypto: Crypto) -> str:
37
44
  raw_value = raw_value[1:-1]
38
45
  if not SENSITIVE_KEYWORDS.search(key):
39
46
  continue
47
+ if not _is_probable_secret(raw_value, key):
48
+ continue
40
49
  if is_encrypted_value(raw_value):
41
50
  continue
42
51
  token = crypto.encrypt(raw_value).token
@@ -78,7 +87,7 @@ def _decrypt_env_lines(text: str, crypto: Crypto) -> str:
78
87
  return "\n".join(lines) + ("\n" if text.endswith("\n") else "")
79
88
 
80
89
 
81
- def _encrypt_generic(text: str, crypto: Crypto) -> str:
90
+ def _encrypt_generic(text: str, crypto: Crypto, *, path: Path | None = None) -> str:
82
91
  changed = False
83
92
 
84
93
  def replacer(match: re.Match) -> str:
@@ -87,10 +96,23 @@ def _encrypt_generic(text: str, crypto: Crypto) -> str:
87
96
  inner = pattern.regex.search(match.group(0))
88
97
  if inner:
89
98
  value = inner.group(pattern.group)
90
- if is_encrypted_value(value):
99
+ raw_value = value
100
+ quote = ""
101
+ if raw_value.startswith(('"', "'")) and raw_value.endswith(('"', "'")):
102
+ quote = raw_value[0]
103
+ raw_value = raw_value[1:-1]
104
+ if is_encrypted_value(raw_value):
105
+ return match.group(0)
106
+ if path is not None and path.suffix in {".tf", ".tfvars"}:
107
+ if _is_terraform_reference(raw_value):
108
+ return match.group(0)
109
+ if not _is_probable_secret(raw_value, match.group(0)):
91
110
  return match.group(0)
92
- token = crypto.encrypt(value).token
93
- replaced = match.group(0).replace(value, wrap_encrypted(token), 1)
111
+ token = crypto.encrypt(raw_value).token
112
+ new_value = wrap_encrypted(token)
113
+ if quote:
114
+ new_value = f"{quote}{new_value}{quote}"
115
+ replaced = match.group(0).replace(value, new_value, 1)
94
116
  changed = True
95
117
  return replaced
96
118
  return match.group(0)
@@ -121,7 +143,7 @@ def encrypt_file(path: Path, crypto: Crypto) -> TransformResult:
121
143
  if _is_env_file(path):
122
144
  new_text = _encrypt_env_lines(text, crypto)
123
145
  else:
124
- new_text = _encrypt_generic(text, crypto)
146
+ new_text = _encrypt_generic(text, crypto, path=path)
125
147
  changed = new_text != text
126
148
  if changed:
127
149
  path.write_text(new_text, encoding="utf-8")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: superencryptx
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Scan a repo for secrets and encrypt/decrypt them in-place.
5
5
  Author: Superencrypt Contributors
6
6
  License: MIT License
@@ -46,8 +46,9 @@ Requires-Dist: cryptography>=41.0.0
46
46
  Dynamic: license-file
47
47
 
48
48
  # superencrypt
49
+ https://pypi.org/project/superencryptx/0.1.0/
49
50
 
50
- CLI to scan a repo for secrets (including env files), encrypt them in-place, and decrypt them later using a key.
51
+ CLI to scan a repo for secrets (including env files, Dockerfiles, compose files, and YAML/TOML/JSON/INI-style configs), encrypt them in-place, and decrypt them later using a key.
51
52
 
52
53
  ## Why
53
54
 
@@ -92,6 +93,13 @@ superencrypt decrypt --key-file .superencrypt.key
92
93
 
93
94
  # Scan only (no changes)
94
95
  superencrypt scan
96
+
97
+ # Scan a single file
98
+ superencrypt scan --file path/to/file
99
+
100
+ # Encrypt/decrypt a single file
101
+ superencrypt encrypt --file path/to/file
102
+ superencrypt decrypt --file path/to/file --key-file .superencrypt.key
95
103
  ```
96
104
 
97
105
  ## Pipeline example
@@ -108,6 +116,7 @@ superencrypt decrypt --key "$SUPERENCRYPT_KEY"
108
116
  - Use `scan` first to review matches.
109
117
 
110
118
  ## Development
119
+ https://pypi.org/project/superencryptx/0.1.0/
111
120
 
112
121
  ```bash
113
122
  python -m venv .venv
@@ -1,133 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import re
5
- from dataclasses import dataclass
6
- from pathlib import Path
7
- from typing import Iterable, Iterator, List, Optional
8
-
9
-
10
- SKIP_DIRS = {
11
- ".git",
12
- ".hg",
13
- ".svn",
14
- "node_modules",
15
- "dist",
16
- "build",
17
- ".venv",
18
- "venv",
19
- "__pycache__",
20
- }
21
-
22
- SKIP_FILES = {
23
- ".superencrypt.key",
24
- }
25
-
26
- ENV_FILE_PATTERNS = (
27
- ".env",
28
- ".env.",
29
- ".envrc",
30
- )
31
-
32
- SENSITIVE_KEYWORDS = re.compile(
33
- r"(?i)(password|passwd|secret|token|api[_-]?key|access[_-]?key|private[_-]?key|db[_-]?user|database[_-]?user)"
34
- )
35
-
36
-
37
- @dataclass
38
- class Finding:
39
- path: Path
40
- line_number: int
41
- key: Optional[str]
42
- value: str
43
-
44
-
45
- @dataclass
46
- class SecretPattern:
47
- name: str
48
- regex: re.Pattern
49
- group: int
50
-
51
-
52
- SECRET_PATTERNS: List[SecretPattern] = [
53
- SecretPattern(
54
- name="aws_access_key_id",
55
- regex=re.compile(r"\b(AKIA[0-9A-Z]{16})\b"),
56
- group=1,
57
- ),
58
- SecretPattern(
59
- name="aws_secret_access_key",
60
- regex=re.compile(r"(?i)aws_secret_access_key\s*[:=]\s*([A-Za-z0-9/+=]{40})"),
61
- group=1,
62
- ),
63
- SecretPattern(
64
- name="generic_assignment",
65
- regex=re.compile(r"(?i)(password|passwd|secret|token|api[_-]?key)\s*[:=]\s*([\w\-./+=:@]+)"),
66
- group=2,
67
- ),
68
- ]
69
-
70
-
71
- def _is_env_file(path: Path) -> bool:
72
- name = path.name
73
- if name == ".env" or name.startswith(".env.") or name.endswith(".env"):
74
- return True
75
- return name in ENV_FILE_PATTERNS
76
-
77
-
78
- def _is_binary(data: bytes) -> bool:
79
- return b"\x00" in data
80
-
81
-
82
- def iter_repo_files(root: Path) -> Iterator[Path]:
83
- for dirpath, dirnames, filenames in os.walk(root):
84
- dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
85
- for filename in filenames:
86
- if filename in SKIP_FILES:
87
- continue
88
- yield Path(dirpath) / filename
89
-
90
-
91
- def scan_env_file(path: Path) -> List[Finding]:
92
- findings: List[Finding] = []
93
- text = path.read_text(encoding="utf-8", errors="ignore")
94
- for idx, line in enumerate(text.splitlines(), start=1):
95
- stripped = line.strip()
96
- if not stripped or stripped.startswith("#"):
97
- continue
98
- if stripped.startswith("export "):
99
- stripped = stripped[len("export ") :]
100
- if "=" not in stripped:
101
- continue
102
- key, value = stripped.split("=", 1)
103
- key = key.strip()
104
- value = value.strip().strip('"').strip("'")
105
- if SENSITIVE_KEYWORDS.search(key):
106
- findings.append(Finding(path=path, line_number=idx, key=key, value=value))
107
- return findings
108
-
109
-
110
- def scan_file_for_patterns(path: Path) -> List[Finding]:
111
- data = path.read_bytes()
112
- if _is_binary(data):
113
- return []
114
- text = data.decode("utf-8", errors="ignore")
115
- findings: List[Finding] = []
116
- for idx, line in enumerate(text.splitlines(), start=1):
117
- for pattern in SECRET_PATTERNS:
118
- match = pattern.regex.search(line)
119
- if not match:
120
- continue
121
- value = match.group(pattern.group)
122
- findings.append(Finding(path=path, line_number=idx, key=pattern.name, value=value))
123
- return findings
124
-
125
-
126
- def scan_repo(root: Path) -> List[Finding]:
127
- findings: List[Finding] = []
128
- for path in iter_repo_files(root):
129
- if _is_env_file(path):
130
- findings.extend(scan_env_file(path))
131
- continue
132
- findings.extend(scan_file_for_patterns(path))
133
- return findings
File without changes
File without changes