vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""Security gate: command_injection and path_traversal_hint detection.
|
|
2
|
+
|
|
3
|
+
security.command_injection:
|
|
4
|
+
subprocess.* called with shell=True AND first positional arg is a
|
|
5
|
+
string-building expression (BinOp Add, JoinedStr f-string, or .format()).
|
|
6
|
+
|
|
7
|
+
security.path_traversal_hint:
|
|
8
|
+
os.path.join(base, user_input) or Path(base) / user_input where user_input
|
|
9
|
+
is a function parameter and no sanitizer (.resolve() + is_relative_to or
|
|
10
|
+
explicit ".." check) is visible in the function body.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import ast
|
|
15
|
+
import logging
|
|
16
|
+
import re
|
|
17
|
+
|
|
18
|
+
from vigil_forensic._shared import (
|
|
19
|
+
EvidenceReference,
|
|
20
|
+
GateCategory,
|
|
21
|
+
GateImpact,
|
|
22
|
+
GateSeverity,
|
|
23
|
+
RepairKind,
|
|
24
|
+
)
|
|
25
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
26
|
+
from ..source_analysis import is_source_file
|
|
27
|
+
from .common import build_check_result, build_finding, normalize_path
|
|
28
|
+
from ._ast_helpers import parse_python_source_or_emit_finding
|
|
29
|
+
|
|
30
|
+
_log = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# subprocess function names considered dangerous with shell=True
|
|
33
|
+
_SUBPROCESS_FUNCS = frozenset({"run", "Popen", "call", "check_call", "check_output"})
|
|
34
|
+
|
|
35
|
+
# Taint hint: parameter names that suggest user-supplied / external input.
|
|
36
|
+
# Only path parameters matching this regex (or annotated with `# taint: user-supplied`)
|
|
37
|
+
# are flagged as path traversal hints.
|
|
38
|
+
_TAINT_PARAM_RE = re.compile(
|
|
39
|
+
r"(?i)^(user_|untrusted_|input_|request_|body_|form_|query_|param_|upload_|client_|external_)"
|
|
40
|
+
r"|(_input|_upload|_param|_arg|_from_user)$"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# Helpers — command_injection
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
def _is_shell_true(call: ast.Call) -> bool:
|
|
49
|
+
for kw in call.keywords:
|
|
50
|
+
if kw.arg == "shell" and isinstance(kw.value, ast.Constant) and kw.value.value is True:
|
|
51
|
+
return True
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _is_string_building(node: ast.expr) -> bool:
|
|
56
|
+
"""Return True if node is string concatenation, f-string, or .format() call."""
|
|
57
|
+
# BinOp with Add (e.g. "cmd " + arg)
|
|
58
|
+
if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add):
|
|
59
|
+
return True
|
|
60
|
+
# JoinedStr = f-string
|
|
61
|
+
if isinstance(node, ast.JoinedStr):
|
|
62
|
+
return True
|
|
63
|
+
# <expr>.format(...)
|
|
64
|
+
if isinstance(node, ast.Call):
|
|
65
|
+
if isinstance(node.func, ast.Attribute) and node.func.attr == "format":
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _get_func_name(call: ast.Call) -> tuple[str, str] | None:
|
|
71
|
+
"""Return (module, func) for subprocess.X calls, or None."""
|
|
72
|
+
func = call.func
|
|
73
|
+
if isinstance(func, ast.Attribute) and isinstance(func.value, ast.Name):
|
|
74
|
+
return func.value.id, func.attr
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _find_command_injections(tree: ast.AST, file_path: str) -> list[dict]:
|
|
79
|
+
"""B4 (2026-04-23): accepts a pre-parsed tree so the meta-syntax-error
|
|
80
|
+
path emits once per file in the caller."""
|
|
81
|
+
hits: list[dict] = []
|
|
82
|
+
for node in ast.walk(tree):
|
|
83
|
+
if not isinstance(node, ast.Call):
|
|
84
|
+
continue
|
|
85
|
+
parts = _get_func_name(node)
|
|
86
|
+
if parts is None:
|
|
87
|
+
continue
|
|
88
|
+
module, func = parts
|
|
89
|
+
if module != "subprocess" or func not in _SUBPROCESS_FUNCS:
|
|
90
|
+
continue
|
|
91
|
+
if not _is_shell_true(node):
|
|
92
|
+
continue
|
|
93
|
+
# Check first positional arg
|
|
94
|
+
if node.args and _is_string_building(node.args[0]):
|
|
95
|
+
hits.append({
|
|
96
|
+
"call": f"subprocess.{func}",
|
|
97
|
+
"line": getattr(node, "lineno", 0),
|
|
98
|
+
"file": file_path,
|
|
99
|
+
})
|
|
100
|
+
return hits
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
# Helpers — path_traversal_hint
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
def _get_function_params(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
|
|
108
|
+
params: set[str] = set()
|
|
109
|
+
for arg in func_node.args.args + func_node.args.posonlyargs + func_node.args.kwonlyargs:
|
|
110
|
+
params.add(arg.arg)
|
|
111
|
+
if func_node.args.vararg:
|
|
112
|
+
params.add(func_node.args.vararg.arg)
|
|
113
|
+
if func_node.args.kwarg:
|
|
114
|
+
params.add(func_node.args.kwarg.arg)
|
|
115
|
+
return params
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _has_sanitizer(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
|
|
119
|
+
"""Return True if the function body contains resolve() + is_relative_to or '..' check."""
|
|
120
|
+
for node in ast.walk(func_node):
|
|
121
|
+
# .resolve() call
|
|
122
|
+
if isinstance(node, ast.Call):
|
|
123
|
+
if isinstance(node.func, ast.Attribute) and node.func.attr in {"resolve", "is_relative_to"}:
|
|
124
|
+
return True
|
|
125
|
+
# string literal ".." check — any Constant with ".." indicates manual validation
|
|
126
|
+
if isinstance(node, ast.Constant) and isinstance(node.value, str) and ".." in node.value:
|
|
127
|
+
return True
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _find_path_traversal_hints(tree: ast.AST, file_path: str) -> list[dict]:
|
|
132
|
+
"""B4 (2026-04-23): accepts a pre-parsed tree so the meta-syntax-error
|
|
133
|
+
path emits once per file in the caller."""
|
|
134
|
+
hits: list[dict] = []
|
|
135
|
+
|
|
136
|
+
for func_node in ast.walk(tree):
|
|
137
|
+
if not isinstance(func_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
138
|
+
continue
|
|
139
|
+
params = _get_function_params(func_node)
|
|
140
|
+
if not params:
|
|
141
|
+
continue
|
|
142
|
+
if _has_sanitizer(func_node):
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
for node in ast.walk(func_node):
|
|
146
|
+
if not isinstance(node, ast.Call):
|
|
147
|
+
continue
|
|
148
|
+
lineno = getattr(node, "lineno", 0)
|
|
149
|
+
|
|
150
|
+
# os.path.join(base, user_input) where user_input is a param
|
|
151
|
+
parts = _get_func_name(node)
|
|
152
|
+
if parts:
|
|
153
|
+
module, func = parts
|
|
154
|
+
# os.path.join: the node.func is Attribute(value=Attribute(value=Name('os'), attr='path'), attr='join')
|
|
155
|
+
# We handle that pattern separately below.
|
|
156
|
+
|
|
157
|
+
# Check os.path.join pattern
|
|
158
|
+
func_node_call = node.func
|
|
159
|
+
if isinstance(func_node_call, ast.Attribute) and func_node_call.attr == "join":
|
|
160
|
+
val = func_node_call.value
|
|
161
|
+
if isinstance(val, ast.Attribute) and val.attr == "path":
|
|
162
|
+
if isinstance(val.value, ast.Name) and val.value.id == "os":
|
|
163
|
+
# Check if any arg (beyond first) is a tainted function parameter
|
|
164
|
+
for arg in node.args[1:]:
|
|
165
|
+
if isinstance(arg, ast.Name) and arg.id in params:
|
|
166
|
+
if _TAINT_PARAM_RE.search(arg.id):
|
|
167
|
+
hits.append({
|
|
168
|
+
"call": "os.path.join",
|
|
169
|
+
"line": lineno,
|
|
170
|
+
"file": file_path,
|
|
171
|
+
"param": arg.id,
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
# Check Path(base) / user_input pattern — BinOp Div
|
|
175
|
+
# This appears as BinOp(left=Call(func=Name('Path')|Attribute(...Path), op=Div, right=Name(param))
|
|
176
|
+
if isinstance(node, ast.BinOp):
|
|
177
|
+
# ast.walk visits all nodes; skip — we handle BinOp separately below
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
# Walk for BinOp Div with Path on left and tainted param on right
|
|
181
|
+
for node in ast.walk(func_node):
|
|
182
|
+
if not isinstance(node, ast.BinOp):
|
|
183
|
+
continue
|
|
184
|
+
if not isinstance(node.op, ast.Div):
|
|
185
|
+
continue
|
|
186
|
+
right = node.right
|
|
187
|
+
if isinstance(right, ast.Name) and right.id in params:
|
|
188
|
+
if not _TAINT_PARAM_RE.search(right.id):
|
|
189
|
+
continue
|
|
190
|
+
# Check left involves Path
|
|
191
|
+
left = node.left
|
|
192
|
+
left_src = ast.unparse(left) if hasattr(ast, "unparse") else ""
|
|
193
|
+
if "Path" in left_src or _involves_path_call(left):
|
|
194
|
+
lineno = getattr(node, "lineno", 0)
|
|
195
|
+
hits.append({
|
|
196
|
+
"call": "Path(...) / param",
|
|
197
|
+
"line": lineno,
|
|
198
|
+
"file": file_path,
|
|
199
|
+
"param": right.id,
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
return hits
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _involves_path_call(node: ast.expr) -> bool:
|
|
206
|
+
"""Return True if the node tree contains a call to Path or pathlib.Path."""
|
|
207
|
+
for n in ast.walk(node):
|
|
208
|
+
if isinstance(n, ast.Call):
|
|
209
|
+
if isinstance(n.func, ast.Name) and n.func.id == "Path":
|
|
210
|
+
return True
|
|
211
|
+
if isinstance(n.func, ast.Attribute) and n.func.attr == "Path":
|
|
212
|
+
return True
|
|
213
|
+
return False
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
# ---------------------------------------------------------------------------
|
|
217
|
+
# Gate entry-point
|
|
218
|
+
# ---------------------------------------------------------------------------
|
|
219
|
+
|
|
220
|
+
def run_security_injection_checks(ctx: PostExecGateContext):
|
|
221
|
+
"""Detect command injection and path traversal hints in changed Python files."""
|
|
222
|
+
findings = []
|
|
223
|
+
|
|
224
|
+
for raw_path in ctx.changed_files_observed:
|
|
225
|
+
normalized = normalize_path(raw_path)
|
|
226
|
+
if not is_source_file(normalized):
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
abs_path = ctx.project_dir / normalized
|
|
230
|
+
try:
|
|
231
|
+
src = abs_path.read_text(encoding="utf-8")
|
|
232
|
+
except (OSError, UnicodeDecodeError) as exc:
|
|
233
|
+
_log.debug("security_injection_checks: cannot read %s: %s", normalized, exc)
|
|
234
|
+
continue
|
|
235
|
+
|
|
236
|
+
# B4 (2026-04-23): parse once so SyntaxError emits a single
|
|
237
|
+
# meta.syntax_parse_error for this file.
|
|
238
|
+
tree = parse_python_source_or_emit_finding(
|
|
239
|
+
src,
|
|
240
|
+
rel_path=normalized,
|
|
241
|
+
emit_finding=findings.append,
|
|
242
|
+
emitting_gate="security_injection",
|
|
243
|
+
)
|
|
244
|
+
if tree is None:
|
|
245
|
+
continue
|
|
246
|
+
|
|
247
|
+
# --- security.command_injection ---
|
|
248
|
+
for hit in _find_command_injections(tree, normalized):
|
|
249
|
+
lineno = hit["line"]
|
|
250
|
+
call_name = hit["call"]
|
|
251
|
+
findings.append(
|
|
252
|
+
build_finding(
|
|
253
|
+
check_id="security.command_injection",
|
|
254
|
+
category=GateCategory.CONTRACT,
|
|
255
|
+
title=f"Command injection risk: {call_name}(shell=True, <string-build>) at {normalized}:{lineno}",
|
|
256
|
+
severity=GateSeverity.CRITICAL,
|
|
257
|
+
impact=GateImpact.BLOCK,
|
|
258
|
+
summary=(
|
|
259
|
+
f"{normalized} line {lineno}: {call_name}() called with shell=True "
|
|
260
|
+
"and a dynamically-built string (concatenation / f-string / .format). "
|
|
261
|
+
"An attacker controlling any part of the string can inject arbitrary shell commands."
|
|
262
|
+
),
|
|
263
|
+
recommendation=(
|
|
264
|
+
"Pass a list of arguments instead of a shell string. "
|
|
265
|
+
"Use subprocess.run([cmd, arg1, arg2], shell=False). "
|
|
266
|
+
"Never build shell commands from untrusted input."
|
|
267
|
+
),
|
|
268
|
+
evidence=[
|
|
269
|
+
EvidenceReference(
|
|
270
|
+
kind="file",
|
|
271
|
+
path=normalized,
|
|
272
|
+
detail=f"line:{lineno}",
|
|
273
|
+
)
|
|
274
|
+
],
|
|
275
|
+
repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
|
|
276
|
+
executor_action=(
|
|
277
|
+
"Pass list of args instead of shell string. "
|
|
278
|
+
"Use subprocess.run([cmd, arg1, arg2], shell=False)"
|
|
279
|
+
),
|
|
280
|
+
proof_required=(
|
|
281
|
+
"no subprocess call with shell=True + string concatenation/f-string/format remains"
|
|
282
|
+
),
|
|
283
|
+
allowlist_allowed=False,
|
|
284
|
+
)
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# --- security.path_traversal_hint ---
|
|
288
|
+
for hit in _find_path_traversal_hints(tree, normalized):
|
|
289
|
+
lineno = hit["line"]
|
|
290
|
+
call_name = hit["call"]
|
|
291
|
+
param = hit.get("param", "?")
|
|
292
|
+
findings.append(
|
|
293
|
+
build_finding(
|
|
294
|
+
check_id="security.path_traversal_hint",
|
|
295
|
+
category=GateCategory.CONTRACT,
|
|
296
|
+
title=f"Path traversal hint: {call_name} with unvalidated param '{param}' at {normalized}:{lineno}",
|
|
297
|
+
severity=GateSeverity.HIGH,
|
|
298
|
+
impact=GateImpact.REVISE,
|
|
299
|
+
summary=(
|
|
300
|
+
f"{normalized} line {lineno}: {call_name} receives function parameter '{param}' "
|
|
301
|
+
"without visible '..' check or resolve()+is_relative_to() guard. "
|
|
302
|
+
"User-controlled path components can escape the intended base directory."
|
|
303
|
+
),
|
|
304
|
+
recommendation=(
|
|
305
|
+
"Validate user-supplied path components: call .resolve() and verify "
|
|
306
|
+
".is_relative_to(base_dir), or explicitly reject paths containing '..'."
|
|
307
|
+
),
|
|
308
|
+
evidence=[
|
|
309
|
+
EvidenceReference(
|
|
310
|
+
kind="file",
|
|
311
|
+
path=normalized,
|
|
312
|
+
detail=f"line:{lineno} param={param}",
|
|
313
|
+
)
|
|
314
|
+
],
|
|
315
|
+
repair_kind=RepairKind.VALIDATE_BOUNDARY.value,
|
|
316
|
+
executor_action=(
|
|
317
|
+
f"Add path sanitization before {call_name}: "
|
|
318
|
+
"resolved = Path(base, {param}).resolve(); "
|
|
319
|
+
"assert resolved.is_relative_to(base_dir)"
|
|
320
|
+
),
|
|
321
|
+
proof_required=(
|
|
322
|
+
f"every {call_name} call with user-supplied '{param}' has resolve()+is_relative_to guard"
|
|
323
|
+
),
|
|
324
|
+
allowlist_allowed=True,
|
|
325
|
+
)
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
return build_check_result(
|
|
329
|
+
check_id="security_injection",
|
|
330
|
+
category=GateCategory.CONTRACT,
|
|
331
|
+
findings=findings,
|
|
332
|
+
)
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity
|
|
6
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
7
|
+
from .common import build_check_result, build_finding
|
|
8
|
+
import logging
|
|
9
|
+
_log = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
_STOPWORDS: frozenset[str] = frozenset({
|
|
13
|
+
"the", "and", "for", "with", "that", "this", "from", "into", "only", "must",
|
|
14
|
+
"should", "would", "could", "без", "для", "или", "что", "как", "это", "если",
|
|
15
|
+
"надо", "нужно", "задача", "task", "run", "verify", "verification",
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
_NO_EDIT_MARKERS: tuple[str, ...] = (
|
|
19
|
+
# English
|
|
20
|
+
"do not edit",
|
|
21
|
+
"no project file changes",
|
|
22
|
+
"do not change project files",
|
|
23
|
+
"without editing project files",
|
|
24
|
+
"discussion only",
|
|
25
|
+
"plan only",
|
|
26
|
+
"do not execute",
|
|
27
|
+
# Russian
|
|
28
|
+
"не редакт",
|
|
29
|
+
"не редактируй",
|
|
30
|
+
"не менять файлы",
|
|
31
|
+
"не меняй файлы",
|
|
32
|
+
"без изменений файлов",
|
|
33
|
+
"только обсуди",
|
|
34
|
+
"только план",
|
|
35
|
+
"не запускай исполнение",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def run_semantic_intent_checks(ctx: PostExecGateContext):
|
|
40
|
+
findings = []
|
|
41
|
+
report = ctx.structured_handoff.report if ctx.structured_handoff is not None else None
|
|
42
|
+
if report is None:
|
|
43
|
+
return build_check_result(check_id="semantic_intent", category=GateCategory.SEMANTIC_INTENT)
|
|
44
|
+
|
|
45
|
+
request_text = ctx.original_user_request.strip()
|
|
46
|
+
handoff_text = " ".join((
|
|
47
|
+
report.task_understanding,
|
|
48
|
+
" ".join(report.actions_taken),
|
|
49
|
+
" ".join(report.verification_performed),
|
|
50
|
+
)).lower()
|
|
51
|
+
request_keywords = _keywords(request_text)
|
|
52
|
+
matched_keywords = tuple(keyword for keyword in request_keywords if keyword in handoff_text)
|
|
53
|
+
|
|
54
|
+
if report.result_claim == "success" and len(request_keywords) >= 3 and not matched_keywords:
|
|
55
|
+
findings.append(
|
|
56
|
+
build_finding(
|
|
57
|
+
check_id="semantic.intent_not_reflected",
|
|
58
|
+
category=GateCategory.SEMANTIC_INTENT,
|
|
59
|
+
title="Successful handoff does not reflect task intent keywords",
|
|
60
|
+
severity=GateSeverity.HIGH,
|
|
61
|
+
impact=GateImpact.REVISE,
|
|
62
|
+
summary=(
|
|
63
|
+
"The executor claims success, but its handoff does not mention any high-signal "
|
|
64
|
+
"keywords from the original operator request."
|
|
65
|
+
),
|
|
66
|
+
recommendation="Revise the handoff or implementation so the result explicitly addresses the requested objective.",
|
|
67
|
+
evidence=[
|
|
68
|
+
EvidenceReference(
|
|
69
|
+
kind="handoff",
|
|
70
|
+
detail=", ".join(request_keywords[:8]),
|
|
71
|
+
)
|
|
72
|
+
],
|
|
73
|
+
|
|
74
|
+
repair_kind='validate_boundary',
|
|
75
|
+
executor_action='Address finding details',
|
|
76
|
+
proof_required='Intent preserved',
|
|
77
|
+
allowlist_allowed=False,
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
lower_request = request_text.lower()
|
|
82
|
+
forbids_project_edits = any(marker in lower_request for marker in _NO_EDIT_MARKERS)
|
|
83
|
+
non_metadata_changes = tuple(
|
|
84
|
+
path for path in ctx.changed_files_observed
|
|
85
|
+
if not path.replace("\\", "/").startswith((".a1/", ".cortex/", ".claude/", ".prompt-engineer/"))
|
|
86
|
+
)
|
|
87
|
+
if forbids_project_edits and non_metadata_changes:
|
|
88
|
+
findings.append(
|
|
89
|
+
build_finding(
|
|
90
|
+
check_id="semantic.forbidden_edit_violation",
|
|
91
|
+
category=GateCategory.SEMANTIC_INTENT,
|
|
92
|
+
title="Executor changed files despite a no-edit task constraint",
|
|
93
|
+
severity=GateSeverity.CRITICAL,
|
|
94
|
+
impact=GateImpact.BLOCK,
|
|
95
|
+
summary="The original request prohibited project-file edits, but observed changed-file evidence includes project files.",
|
|
96
|
+
recommendation="Revert or explain the unauthorized project-file changes and rerun through the control plane.",
|
|
97
|
+
evidence=[EvidenceReference(kind="changed_file", path=path) for path in non_metadata_changes[:5]],
|
|
98
|
+
|
|
99
|
+
repair_kind='validate_boundary',
|
|
100
|
+
executor_action='Address finding details',
|
|
101
|
+
proof_required='Intent preserved',
|
|
102
|
+
allowlist_allowed=False,
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
if report.result_claim == "success" and (report.blockers or report.uncertainties):
|
|
107
|
+
findings.append(
|
|
108
|
+
build_finding(
|
|
109
|
+
check_id="semantic.success_with_unresolved_blockers",
|
|
110
|
+
category=GateCategory.SEMANTIC_INTENT,
|
|
111
|
+
title="Handoff claims success while listing blockers or uncertainties",
|
|
112
|
+
severity=GateSeverity.HIGH,
|
|
113
|
+
impact=GateImpact.REVISE,
|
|
114
|
+
summary="The executor result_claim is success, but the handoff still contains blockers or uncertainties.",
|
|
115
|
+
recommendation="Downgrade the claim to partial/failed or resolve the listed blockers before success.",
|
|
116
|
+
|
|
117
|
+
repair_kind='validate_boundary',
|
|
118
|
+
executor_action='Address finding details',
|
|
119
|
+
proof_required='Intent preserved',
|
|
120
|
+
allowlist_allowed=False,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return build_check_result(
|
|
125
|
+
check_id="semantic_intent",
|
|
126
|
+
category=GateCategory.SEMANTIC_INTENT,
|
|
127
|
+
findings=findings,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _keywords(text: str) -> tuple[str, ...]:
|
|
132
|
+
words = re.findall(r"[A-Za-zА-Яа-я0-9_]{4,}", text.lower())
|
|
133
|
+
result: list[str] = []
|
|
134
|
+
for word in words:
|
|
135
|
+
if word in _STOPWORDS:
|
|
136
|
+
continue
|
|
137
|
+
if word not in result:
|
|
138
|
+
result.append(word)
|
|
139
|
+
return tuple(result[:12])
|