vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,704 @@
1
+ """Config safety gate: dangerous_default, missing_env_var_check, hardcoded_path.
2
+
3
+ config.dangerous_default:
4
+ Dataclass field or function parameter default with a security-risky value:
5
+ allow_unsafe=True, verify_ssl=False, check_certs=False, debug=True,
6
+ trust_env=True, unsafe_*=True.
7
+
8
+ config.missing_env_var_check:
9
+ os.environ["VAR"] subscript access without surrounding try/except KeyError
10
+ or a prior `if "VAR" in os.environ:` guard in the same function body.
11
+
12
+ config.unguarded_env_access.hardcoded_path (Sprint F-4):
13
+ String constants embedded as launch/runtime dependencies that pin the code
14
+ to a specific machine's filesystem layout — the natural extension of the
15
+ unguarded_env_access concern. These show up most often as:
16
+ * /usr/bin/pythonN — hardcoded interpreter path in subprocess argv
17
+ * C:\\Python311\\python.exe — same, Windows variant
18
+ * C:\\Users\\<user>\\... or /home/<user>/... — hardcoded home dir
19
+ * .venv/bin/<exe> or venv/Scripts/<exe> — pinned virtualenv layout
20
+ Detection is AST-based; the constant must appear as the first element of
21
+ subprocess argv, the path argument of os.execv/os.execve, the argument
22
+ to pathlib.Path(), or the right-hand side of os.environ["PATH"] / etc.
23
+ Severity adjusts by deployment_target context (linux-only project +
24
+ windows-only path = HIGH; cross-platform = MEDIUM).
25
+ """
26
+ from __future__ import annotations
27
+
28
+ import ast
29
+ import logging
30
+ import re
31
+ from pathlib import Path
32
+
33
+ from vigil_forensic._shared import (
34
+ EvidenceReference,
35
+ GateCategory,
36
+ GateImpact,
37
+ GateSeverity,
38
+ RepairKind,
39
+ )
40
+ from vigil_forensic.gate_models import PostExecGateContext
41
+ from ..source_analysis import is_source_file
42
+ from ._deployment_detector import resolve_deployment
43
+ from .common import build_check_result, build_finding, has_allowlist_for, normalize_path
44
+
45
+ _log = logging.getLogger(__name__)
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Dangerous defaults: (param_name_pattern, dangerous_value)
49
+ # dangerous_value=True means the flag True is risky; False means False is risky
50
+ # ---------------------------------------------------------------------------
51
+ _DANGEROUS_DEFAULTS: list[tuple[re.Pattern[str], bool]] = [
52
+ (re.compile(r"^allow_unsafe$"), True),
53
+ (re.compile(r"^verify_ssl$"), False),
54
+ (re.compile(r"^check_certs$"), False),
55
+ (re.compile(r"^debug$"), True),
56
+ (re.compile(r"^trust_env$"), True),
57
+ (re.compile(r"^unsafe_"), True), # any name starting with unsafe_
58
+ ]
59
+
60
+
61
+ def _is_dangerous_constant(name: str, value: object) -> bool:
62
+ """Return True if name+value matches a dangerous-default pattern."""
63
+ for pattern, risky_value in _DANGEROUS_DEFAULTS:
64
+ if pattern.match(name) and value is risky_value:
65
+ return True
66
+ return False
67
+
68
+
69
+ # ---------------------------------------------------------------------------
70
+ # Helpers — dangerous_default
71
+ # ---------------------------------------------------------------------------
72
+
73
+ def _find_dangerous_defaults(src: str, file_path: str) -> list[dict]:
74
+ try:
75
+ tree = ast.parse(src)
76
+ except SyntaxError:
77
+ return []
78
+
79
+ hits: list[dict] = []
80
+
81
+ for node in ast.walk(tree):
82
+ # Function parameter defaults
83
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
84
+ defaults = node.args.defaults + node.args.kw_defaults
85
+ kw_args = node.args.kwonlyargs
86
+ pos_args = node.args.args
87
+ # positional defaults are right-aligned
88
+ all_args = pos_args + kw_args
89
+ for i, default in enumerate(defaults):
90
+ if default is None:
91
+ continue
92
+ # map defaults back to arg names (right-aligned for positional)
93
+ offset = len(pos_args) - len(node.args.defaults)
94
+ if i < len(node.args.defaults):
95
+ arg_idx = offset + i
96
+ if 0 <= arg_idx < len(pos_args):
97
+ arg_name = pos_args[arg_idx].arg
98
+ else:
99
+ continue
100
+ else:
101
+ kw_idx = i - len(node.args.defaults)
102
+ if kw_idx < len(kw_args):
103
+ arg_name = kw_args[kw_idx].arg
104
+ else:
105
+ continue
106
+ if isinstance(default, ast.Constant) and isinstance(default.value, bool):
107
+ if _is_dangerous_constant(arg_name, default.value):
108
+ hits.append({
109
+ "kind": "func_param",
110
+ "name": arg_name,
111
+ "value": default.value,
112
+ "line": getattr(default, "lineno", getattr(node, "lineno", 0)),
113
+ "file": file_path,
114
+ })
115
+
116
+ # Dataclass / class body Assign (e.g. field defaults)
117
+ if isinstance(node, ast.ClassDef):
118
+ for stmt in node.body:
119
+ if isinstance(stmt, ast.AnnAssign) and stmt.value is not None:
120
+ target = stmt.target
121
+ val = stmt.value
122
+ if isinstance(target, ast.Name) and isinstance(val, ast.Constant):
123
+ if isinstance(val.value, bool) and _is_dangerous_constant(target.id, val.value):
124
+ hits.append({
125
+ "kind": "class_field",
126
+ "name": target.id,
127
+ "value": val.value,
128
+ "line": getattr(stmt, "lineno", 0),
129
+ "file": file_path,
130
+ })
131
+ if isinstance(stmt, ast.Assign):
132
+ for target in stmt.targets:
133
+ if isinstance(target, ast.Name) and isinstance(stmt.value, ast.Constant):
134
+ if isinstance(stmt.value.value, bool) and _is_dangerous_constant(target.id, stmt.value.value):
135
+ hits.append({
136
+ "kind": "class_field",
137
+ "name": target.id,
138
+ "value": stmt.value.value,
139
+ "line": getattr(stmt, "lineno", 0),
140
+ "file": file_path,
141
+ })
142
+
143
+ return hits
144
+
145
+
146
+ # ---------------------------------------------------------------------------
147
+ # Helpers — missing_env_var_check
148
+ # ---------------------------------------------------------------------------
149
+
150
+ def _extract_environ_subscript_accesses(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> list[dict]:
151
+ """Collect os.environ["VAR"] subscript accesses in a function body."""
152
+ results: list[dict] = []
153
+ for node in ast.walk(func_node):
154
+ if isinstance(node, ast.Subscript):
155
+ val = node.value
156
+ if not isinstance(val, ast.Attribute):
157
+ continue
158
+ if val.attr != "environ":
159
+ continue
160
+ if not isinstance(val.value, ast.Name):
161
+ continue
162
+ if val.value.id != "os":
163
+ continue
164
+ # Extract the key
165
+ key_node = node.slice
166
+ # Python 3.9+: slice is direct; 3.8: wrapped in Index
167
+ if isinstance(key_node, ast.Index):
168
+ key_node = key_node.value # type: ignore[attr-defined]
169
+ if isinstance(key_node, ast.Constant) and isinstance(key_node.value, str):
170
+ results.append({
171
+ "varname": key_node.value,
172
+ "line": getattr(node, "lineno", 0),
173
+ })
174
+ return results
175
+
176
+
177
+ def _has_environ_guard(func_node: ast.FunctionDef | ast.AsyncFunctionDef, varname: str) -> bool:
178
+ """Return True if the function body contains a guard for the given env var name.
179
+
180
+ Guards recognised:
181
+ 1. `if "VAR" in os.environ:` — Compare with In operator
182
+ 2. try/except KeyError (or bare except) wrapping body
183
+ """
184
+ for node in ast.walk(func_node):
185
+ # Pattern 1: "VAR" in os.environ OR "VAR" not in os.environ
186
+ if isinstance(node, ast.Compare):
187
+ if node.ops and isinstance(node.ops[0], (ast.In, ast.NotIn)):
188
+ left = node.left
189
+ if isinstance(left, ast.Constant) and left.value == varname:
190
+ return True
191
+ # Pattern 2: try/except KeyError
192
+ if isinstance(node, ast.Try):
193
+ for handler in node.handlers:
194
+ if handler.type is None:
195
+ return True # bare except
196
+ if isinstance(handler.type, ast.Name) and handler.type.id == "KeyError":
197
+ return True
198
+ if isinstance(handler.type, ast.Attribute) and handler.type.attr == "KeyError":
199
+ return True
200
+ return False
201
+
202
+
203
+ def _find_missing_env_var_checks(src: str, file_path: str) -> list[dict]:
204
+ try:
205
+ tree = ast.parse(src)
206
+ except SyntaxError:
207
+ return []
208
+
209
+ hits: list[dict] = []
210
+
211
+ for node in ast.walk(tree):
212
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
213
+ continue
214
+ for access in _extract_environ_subscript_accesses(node):
215
+ varname = access["varname"]
216
+ if not _has_environ_guard(node, varname):
217
+ hits.append({
218
+ "varname": varname,
219
+ "line": access["line"],
220
+ "file": file_path,
221
+ })
222
+
223
+ # Also check module-level os.environ["VAR"] (outside any function).
224
+ # ast.walk visits the entire tree including nested function bodies, so we
225
+ # cannot use it here. Instead walk only the direct children of Module.body
226
+ # — these are guaranteed to be top-level statements.
227
+ module_guarded_vars: set[str] = set()
228
+ for stmt in tree.body:
229
+ # Collect module-level guards: `if "VAR" in os.environ:` or try/except KeyError
230
+ if isinstance(stmt, ast.If):
231
+ test = stmt.test
232
+ if isinstance(test, ast.Compare) and test.ops and isinstance(test.ops[0], (ast.In, ast.NotIn)):
233
+ if isinstance(test.left, ast.Constant) and isinstance(test.left.value, str):
234
+ module_guarded_vars.add(test.left.value)
235
+ if isinstance(stmt, ast.Try):
236
+ for handler in stmt.handlers:
237
+ if handler.type is None:
238
+ # bare except at module level — treat all vars as guarded is too broad;
239
+ # we record the vars accessed inside the try body specifically
240
+ for sub in ast.walk(stmt):
241
+ if isinstance(sub, ast.Subscript):
242
+ val = sub.value
243
+ if isinstance(val, ast.Attribute) and val.attr == "environ":
244
+ if isinstance(val.value, ast.Name) and val.value.id == "os":
245
+ key_node = sub.slice
246
+ if isinstance(key_node, ast.Index):
247
+ key_node = key_node.value # type: ignore[attr-defined]
248
+ if isinstance(key_node, ast.Constant) and isinstance(key_node.value, str):
249
+ module_guarded_vars.add(key_node.value)
250
+ break
251
+ if isinstance(handler.type, ast.Name) and handler.type.id == "KeyError":
252
+ for sub in ast.walk(stmt):
253
+ if isinstance(sub, ast.Subscript):
254
+ val = sub.value
255
+ if isinstance(val, ast.Attribute) and val.attr == "environ":
256
+ if isinstance(val.value, ast.Name) and val.value.id == "os":
257
+ key_node = sub.slice
258
+ if isinstance(key_node, ast.Index):
259
+ key_node = key_node.value # type: ignore[attr-defined]
260
+ if isinstance(key_node, ast.Constant) and isinstance(key_node.value, str):
261
+ module_guarded_vars.add(key_node.value)
262
+ break
263
+
264
+ # Now find unguarded subscript accesses in top-level Assign / AnnAssign / Expr
265
+ for stmt in tree.body:
266
+ if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.If, ast.Try)):
267
+ continue # skip function defs (already handled) and guarded blocks
268
+ for sub in ast.walk(stmt):
269
+ if isinstance(sub, ast.Subscript):
270
+ val = sub.value
271
+ if not isinstance(val, ast.Attribute):
272
+ continue
273
+ if val.attr != "environ":
274
+ continue
275
+ if not isinstance(val.value, ast.Name):
276
+ continue
277
+ if val.value.id != "os":
278
+ continue
279
+ key_node = sub.slice
280
+ if isinstance(key_node, ast.Index):
281
+ key_node = key_node.value # type: ignore[attr-defined]
282
+ if isinstance(key_node, ast.Constant) and isinstance(key_node.value, str):
283
+ varname = key_node.value
284
+ if varname not in module_guarded_vars:
285
+ hits.append({
286
+ "varname": varname,
287
+ "line": getattr(sub, "lineno", 0),
288
+ "file": file_path,
289
+ })
290
+
291
+ return hits
292
+
293
+
294
+ # ---------------------------------------------------------------------------
295
+ # Helpers — hardcoded_path (Sprint F-4)
296
+ # ---------------------------------------------------------------------------
297
+ #
298
+ # Hardcoded interpreter / user / venv paths embedded as launch dependencies.
299
+ # These are the second face of the "unguarded environment access" concern:
300
+ # the code reaches outside its sandbox without going through a configurable
301
+ # channel (env var, sys.executable, expanduser, importlib.resources). When
302
+ # the host moves, the code breaks.
303
+ #
304
+ # Patterns are anchored — partial substring matches only fire on real path
305
+ # shapes ("/usr/bin/python3", not the unrelated string "python3" in a docstring).
306
+ # The variant categorisation is exposed in the finding so reviewers can
307
+ # distinguish a Windows-pinned path on a Linux deployment (HIGH severity)
308
+ # from a portable cross-platform pin (MEDIUM).
309
+
310
+ _HARDCODED_PATH_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
311
+ # Hardcoded Python interpreter, POSIX layout.
312
+ # /usr/bin/python, /usr/bin/python3, /usr/bin/python3.11, /usr/local/bin/python
313
+ (re.compile(r"^/usr(?:/local)?/bin/python\d*(?:\.\d+)?$"), "hardcoded_interpreter"),
314
+ # Hardcoded Python interpreter, Windows layout.
315
+ # C:\Python311\python.exe, D:\Python39\python.exe, etc.
316
+ (re.compile(r"^[A-Za-z]:[\\\/]+Python\d+[\\\/]+python\.exe$", re.IGNORECASE), "hardcoded_interpreter"),
317
+ # Hardcoded user paths — Windows.
318
+ # C:\Users\foo\..., D:\Users\bar\... — only flag when the path includes a
319
+ # directory after the user folder (otherwise C:\Users\Public\ etc. would
320
+ # be too broad). The trailing \ ensures a real subdirectory.
321
+ (re.compile(r"^[A-Za-z]:[\\\/]+Users[\\\/]+[A-Za-z0-9._-]+[\\\/]+", re.IGNORECASE), "hardcoded_user_path"),
322
+ # Hardcoded user paths — POSIX.
323
+ # /home/foo/... — must include a subpath after the user folder.
324
+ (re.compile(r"^/home/[A-Za-z0-9._-]+/"), "hardcoded_user_path"),
325
+ # macOS user paths.
326
+ (re.compile(r"^/Users/[A-Za-z0-9._-]+/"), "hardcoded_user_path"),
327
+ # Hardcoded venv paths — Unix.
328
+ (re.compile(r"^[\./]*\.?venv/bin/[A-Za-z0-9._-]+$"), "hardcoded_venv_path"),
329
+ # Hardcoded venv paths — Windows.
330
+ (re.compile(r"^[\./]*\.?venv[\\\/]+Scripts[\\\/]+[A-Za-z0-9._-]+(?:\.exe)?$", re.IGNORECASE), "hardcoded_venv_path"),
331
+ )
332
+
333
+
334
+ def _classify_hardcoded_path(value: str) -> str | None:
335
+ """Return the variant tag if *value* matches a hardcoded-path pattern.
336
+
337
+ Returns None when the string is a normal data value (URL, relative path,
338
+ plain filename, etc.). Anchors guarantee no spurious match on bare
339
+ substrings like ``"python"`` or ``"home"``.
340
+ """
341
+ if not value or len(value) < 5:
342
+ return None
343
+ # Normalise once. We test the original string against patterns that
344
+ # already accept both / and \ where relevant — no double-normalisation.
345
+ for pattern, variant in _HARDCODED_PATH_PATTERNS:
346
+ if pattern.match(value):
347
+ return variant
348
+ return None
349
+
350
+
351
+ # AST call-site signatures we treat as "this string IS a launch/runtime path".
352
+ # A hardcoded path appearing inside an unrelated string-formatting context
353
+ # (e.g. a docstring describing how to install) is not a launch dependency.
354
+ #
355
+ # Format: ((module_or_class, leaf_attr_or_None), arg_index_or_keyword)
356
+ # * (("subprocess", "run"), 0) — first positional argument of subprocess.run
357
+ # * (("subprocess", "Popen"), 0)
358
+ # * (("os", "execv"), 0) — first positional argument of os.execv
359
+ # * (("os", "execve"), 0)
360
+ # * ((None, "Path"), 0) — Path(...) bare call (after `from pathlib import Path`)
361
+ # * (("pathlib", "Path"), 0)
362
+ #
363
+ # When the matching argument is a list/tuple literal (subprocess argv), we
364
+ # inspect its first element rather than the literal itself.
365
+
366
+ _LAUNCH_CALL_SIGNATURES: tuple[tuple[tuple[str | None, str], int], ...] = (
367
+ (("subprocess", "run"), 0),
368
+ (("subprocess", "Popen"), 0),
369
+ (("subprocess", "call"), 0),
370
+ (("subprocess", "check_call"), 0),
371
+ (("subprocess", "check_output"), 0),
372
+ (("os", "execv"), 0),
373
+ (("os", "execve"), 0),
374
+ (("os", "execvp"), 0),
375
+ (("os", "execvpe"), 0),
376
+ (("os", "spawnv"), 1), # spawnv(mode, path, args)
377
+ (("os", "spawnve"), 1),
378
+ ((None, "Path"), 0),
379
+ (("pathlib", "Path"), 0),
380
+ )
381
+
382
+
383
+ def _resolve_simple_call_target(call: ast.Call) -> tuple[str | None, str] | None:
384
+ """Identify a Call as ``module.func(...)`` / ``Class(...)`` / ``func(...)``.
385
+
386
+ Returns (module_or_None, leaf_attr_or_func_name). Used by the launch-call
387
+ signature matcher.
388
+ """
389
+ func = call.func
390
+ if isinstance(func, ast.Attribute):
391
+ if isinstance(func.value, ast.Name):
392
+ return func.value.id, func.attr
393
+ elif isinstance(func, ast.Name):
394
+ return None, func.id
395
+ return None
396
+
397
+
398
+ def _argv_first_arg(call: ast.Call, arg_index: int) -> ast.AST | None:
399
+ """Return the AST node at the given positional argument index, descending
400
+ one level into a list/tuple literal so subprocess([X, ...]) yields X.
401
+
402
+ Returns None if the index is out of range or the slot is unsupported.
403
+ """
404
+ if arg_index >= len(call.args):
405
+ return None
406
+ arg = call.args[arg_index]
407
+ if isinstance(arg, (ast.List, ast.Tuple)) and arg.elts:
408
+ return arg.elts[0]
409
+ return arg
410
+
411
+
412
+ def _is_safe_dynamic_path_call(node: ast.AST) -> bool:
413
+ """Recognise common safe alternatives that produce a path at runtime.
414
+
415
+ These must NOT trigger a hardcoded-path finding even when wrapped in a
416
+ Path(...) / subprocess argv slot:
417
+
418
+ * sys.executable
419
+ * os.path.expanduser(...) / os.path.expandvars(...)
420
+ * Path.home() / Path.cwd()
421
+ * shutil.which(...)
422
+ * importlib.resources.files(...) / .joinpath(...)
423
+ * os.environ.get(...) — already runtime-driven
424
+ * pathlib.Path(__file__).parent / ...
425
+ * Path(...) / ... (anything) — composed result
426
+ """
427
+ if isinstance(node, ast.Attribute):
428
+ # sys.executable
429
+ if isinstance(node.value, ast.Name) and node.value.id == "sys" and node.attr == "executable":
430
+ return True
431
+ # __file__ etc. — safe identifiers used to derive paths.
432
+ if isinstance(node.value, ast.Name) and node.attr in {"parent", "parents", "stem", "name"}:
433
+ return True
434
+ if isinstance(node, ast.Name):
435
+ if node.id in {"__file__", "__path__"}:
436
+ return True
437
+ if isinstance(node, ast.Call):
438
+ target = _resolve_simple_call_target(node)
439
+ if target is None:
440
+ return False
441
+ module, fname = target
442
+ # os.path.expanduser/expandvars
443
+ if module == "os" and fname in {"expanduser", "expandvars"}:
444
+ return True
445
+ if module == "shutil" and fname == "which":
446
+ return True
447
+ # Path.home() / Path.cwd() — the function attr will be on Name "Path".
448
+ if module == "Path" and fname in {"home", "cwd"}:
449
+ return True
450
+ # Anything wrapping os.environ.get(...) is environment-driven.
451
+ if module == "environ" and fname == "get":
452
+ return True
453
+ return False
454
+
455
+
456
+ def _find_hardcoded_paths(
457
+ src: str,
458
+ file_path: str,
459
+ *,
460
+ project_dir: Path,
461
+ ) -> list[dict]:
462
+ """Detect hardcoded interpreter / user / venv paths in launch / runtime
463
+ code. Returns list of hit dicts: ``{value, variant, line, file, severity}``.
464
+ """
465
+ try:
466
+ tree = ast.parse(src)
467
+ except SyntaxError:
468
+ return []
469
+
470
+ # Resolve deployment target once per file. Severity escalates when the
471
+ # detected path platform conflicts with the deployment target.
472
+ try:
473
+ deployment = resolve_deployment(project_dir, file_content=src)
474
+ except Exception: # noqa: BLE001 -- detector must never crash gate
475
+ deployment = "unknown"
476
+
477
+ hits: list[dict] = []
478
+
479
+ for node in ast.walk(tree):
480
+ if not isinstance(node, ast.Call):
481
+ continue
482
+ target = _resolve_simple_call_target(node)
483
+ if target is None:
484
+ continue
485
+
486
+ # Match against known launch-call signatures.
487
+ signature = None
488
+ for sig, arg_index in _LAUNCH_CALL_SIGNATURES:
489
+ if sig == target:
490
+ signature = (sig, arg_index)
491
+ break
492
+ if signature is None:
493
+ continue
494
+ _, arg_index = signature
495
+
496
+ path_node = _argv_first_arg(node, arg_index)
497
+ if path_node is None:
498
+ continue
499
+
500
+ # Whitelist: safe dynamic alternatives.
501
+ if _is_safe_dynamic_path_call(path_node):
502
+ continue
503
+
504
+ # Only string Constants are subject to hardcoded-path classification.
505
+ if not (isinstance(path_node, ast.Constant) and isinstance(path_node.value, str)):
506
+ continue
507
+
508
+ variant = _classify_hardcoded_path(path_node.value)
509
+ if variant is None:
510
+ continue
511
+
512
+ # Severity escalation — windows path on linux-only or vice versa.
513
+ is_windows_path = bool(re.match(r"^[A-Za-z]:[\\\/]", path_node.value))
514
+ is_linux_path = path_node.value.startswith(("/usr/", "/home/", "/Users/"))
515
+ severity = GateSeverity.MEDIUM
516
+ if deployment == "linux-only" and is_windows_path:
517
+ severity = GateSeverity.HIGH
518
+ elif deployment == "windows-only" and is_linux_path:
519
+ severity = GateSeverity.HIGH
520
+
521
+ hits.append({
522
+ "value": path_node.value,
523
+ "variant": variant,
524
+ "line": getattr(path_node, "lineno", getattr(node, "lineno", 0)),
525
+ "file": file_path,
526
+ "severity": severity,
527
+ "deployment": deployment,
528
+ })
529
+
530
+ return hits
531
+
532
+
533
+ # ---------------------------------------------------------------------------
534
+ # Gate entry-point
535
+ # ---------------------------------------------------------------------------
536
+
537
+ def run_config_safety_checks(ctx: PostExecGateContext):
538
+ """Detect dangerous defaults and missing env-var guards in changed Python files."""
539
+ findings = []
540
+
541
+ for raw_path in ctx.changed_files_observed:
542
+ normalized = normalize_path(raw_path)
543
+ if not is_source_file(normalized):
544
+ continue
545
+
546
+ abs_path = ctx.project_dir / normalized
547
+ try:
548
+ src = abs_path.read_text(encoding="utf-8")
549
+ except (OSError, UnicodeDecodeError) as exc:
550
+ _log.debug("config_safety_checks: cannot read %s: %s", normalized, exc)
551
+ continue
552
+
553
+ # --- config.dangerous_default ---
554
+ for hit in _find_dangerous_defaults(src, normalized):
555
+ name = hit["name"]
556
+ value = hit["value"]
557
+ lineno = hit["line"]
558
+ if has_allowlist_for(src, "config.dangerous_default", lineno):
559
+ continue
560
+ safe_value = not value # flip the bool to the safe direction
561
+ findings.append(
562
+ build_finding(
563
+ check_id="config.dangerous_default",
564
+ category=GateCategory.CONFIG_SSOT,
565
+ title=f"Dangerous default: {name}={value!r} at {normalized}:{lineno}",
566
+ severity=GateSeverity.HIGH,
567
+ impact=GateImpact.REVISE,
568
+ summary=(
569
+ f"{normalized} line {lineno}: parameter/field '{name}' defaults to "
570
+ f"{value!r} which is a security-unsafe value. "
571
+ "This opt-in to unsafe behaviour is invisible to callers who rely on defaults."
572
+ ),
573
+ recommendation=(
574
+ f"Change default to {name}={safe_value!r} (the safe value). "
575
+ "Require explicit opt-in via keyword argument for unsafe behaviour."
576
+ ),
577
+ evidence=[
578
+ EvidenceReference(
579
+ kind="file",
580
+ path=normalized,
581
+ detail=f"line:{lineno}",
582
+ )
583
+ ],
584
+ repair_kind=RepairKind.FIX_CONTRACT.value,
585
+ executor_action=(
586
+ f"Flip default to safe value; require explicit opt-in via kwarg. "
587
+ f"Change {name}={value!r} → {name}={safe_value!r}"
588
+ ),
589
+ proof_required=(
590
+ f"No {name}={value!r} default in codebase; unsafe behaviour requires explicit kwarg"
591
+ ),
592
+ allowlist_allowed=True,
593
+ )
594
+ )
595
+
596
+ # --- config.missing_env_var_check ---
597
+ for hit in _find_missing_env_var_checks(src, normalized):
598
+ varname = hit["varname"]
599
+ lineno = hit["line"]
600
+ if has_allowlist_for(src, "config.missing_env_var_check", lineno):
601
+ continue
602
+ findings.append(
603
+ build_finding(
604
+ check_id="config.missing_env_var_check",
605
+ category=GateCategory.CONFIG_SSOT,
606
+ title=f"Unguarded os.environ[\"{varname}\"] at {normalized}:{lineno}",
607
+ severity=GateSeverity.MEDIUM,
608
+ impact=GateImpact.REVISE,
609
+ summary=(
610
+ f"{normalized} line {lineno}: os.environ[\"{varname}\"] raises KeyError "
611
+ "if the variable is absent. No try/except KeyError or "
612
+ f"'if \"{varname}\" in os.environ:' guard is present in the enclosing function."
613
+ ),
614
+ recommendation=(
615
+ f"Replace os.environ[\"{varname}\"] with "
616
+ f"os.environ.get(\"{varname}\", DEFAULT) "
617
+ f"or add an explicit presence check: "
618
+ f"if \"{varname}\" not in os.environ: raise RuntimeError(...)."
619
+ ),
620
+ evidence=[
621
+ EvidenceReference(
622
+ kind="file",
623
+ path=normalized,
624
+ detail=f"line:{lineno}",
625
+ )
626
+ ],
627
+ repair_kind=RepairKind.VALIDATE_BOUNDARY.value,
628
+ executor_action=(
629
+ f"Replace os.environ[\"{varname}\"] with "
630
+ f"os.environ.get(\"{varname}\", DEFAULT) or explicit missing check"
631
+ ),
632
+ proof_required=(
633
+ f"No unguarded os.environ[\"{varname}\"] subscript access remains"
634
+ ),
635
+ allowlist_allowed=True,
636
+ )
637
+ )
638
+
639
+ # --- config.unguarded_env_access.hardcoded_path (Sprint F-4) ---
640
+ for hit in _find_hardcoded_paths(src, normalized, project_dir=ctx.project_dir):
641
+ value = hit["value"]
642
+ variant = hit["variant"]
643
+ lineno = hit["line"]
644
+ severity = hit["severity"]
645
+ deployment = hit["deployment"]
646
+ if has_allowlist_for(src, "config.unguarded_env_access.hardcoded_path", lineno):
647
+ continue
648
+ if has_allowlist_for(src, "config.unguarded_env_access", lineno):
649
+ # Parent id allowlist also suppresses the variant.
650
+ continue
651
+ findings.append(
652
+ build_finding(
653
+ check_id="config.unguarded_env_access.hardcoded_path",
654
+ category=GateCategory.CONFIG_SSOT,
655
+ title=(
656
+ f"Hardcoded {variant.replace('_', ' ')}: {value!r} "
657
+ f"at {normalized}:{lineno}"
658
+ ),
659
+ severity=severity,
660
+ impact=GateImpact.REVISE,
661
+ summary=(
662
+ f"{normalized} line {lineno}: launch/runtime path is the "
663
+ f"hardcoded literal {value!r} (variant: {variant}). "
664
+ "This pins execution to a specific machine's filesystem "
665
+ "layout; the code breaks on any host that does not "
666
+ f"match. Detected deployment_target: {deployment}."
667
+ ),
668
+ recommendation=(
669
+ "Replace with a portable alternative: "
670
+ "sys.executable for Python interpreter, "
671
+ "os.path.expanduser('~/...') or pathlib.Path.home() for "
672
+ "user paths, importlib.resources for bundled assets, "
673
+ "or os.environ.get('VAR') for site-specific values."
674
+ ),
675
+ evidence=[
676
+ EvidenceReference(
677
+ kind="file",
678
+ path=normalized,
679
+ detail=f"line:{lineno}",
680
+ )
681
+ ],
682
+ repair_kind=RepairKind.VALIDATE_BOUNDARY.value,
683
+ executor_action=(
684
+ f"Replace literal {value!r} with portable equivalent "
685
+ f"(sys.executable / os.path.expanduser / Path.home / "
686
+ "importlib.resources / os.environ)."
687
+ ),
688
+ proof_required=(
689
+ "grep shows no remaining hardcoded interpreter / user / "
690
+ "venv path literals at file launch boundary"
691
+ ),
692
+ allowlist_allowed=True,
693
+ confidence=0.85,
694
+ applicability="applicable",
695
+ analysis_mode="ast",
696
+ applicability_reason=f"deployment_target={deployment}",
697
+ )
698
+ )
699
+
700
+ return build_check_result(
701
+ check_id="config_safety",
702
+ category=GateCategory.CONFIG_SSOT,
703
+ findings=findings,
704
+ )