vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from vigil_forensic._shared import is_executor_metadata_path
|
|
4
|
+
from vigil_forensic._shared import GateCategory, GateImpact, GateSeverity
|
|
5
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
6
|
+
from .common import build_check_result, build_finding
|
|
7
|
+
import logging
|
|
8
|
+
_log = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def run_testing_checks(ctx: PostExecGateContext):
|
|
12
|
+
findings = []
|
|
13
|
+
profile = ctx.repo_profile
|
|
14
|
+
if profile is None:
|
|
15
|
+
return build_check_result(check_id="testing", category=GateCategory.TESTING)
|
|
16
|
+
touched = tuple(str(path) for path in ctx.touched_files)
|
|
17
|
+
changed_critical = any(profile.is_critical(path) for path in touched)
|
|
18
|
+
report = (
|
|
19
|
+
ctx.structured_handoff.report
|
|
20
|
+
if ctx.structured_handoff is not None and ctx.structured_handoff.report is not None
|
|
21
|
+
else None
|
|
22
|
+
)
|
|
23
|
+
verification_performed = (
|
|
24
|
+
tuple(str(item).lower() for item in report.verification_performed if str(item).strip())
|
|
25
|
+
if report is not None
|
|
26
|
+
else ()
|
|
27
|
+
)
|
|
28
|
+
test_text = " ".join(verification_performed)
|
|
29
|
+
touched_non_metadata = tuple(path for path in touched if not is_executor_metadata_path(path))
|
|
30
|
+
expected_keywords: set[str] = set()
|
|
31
|
+
for path in touched_non_metadata:
|
|
32
|
+
normalized = path.replace("\\", "/").lower()
|
|
33
|
+
if "runtime" in normalized:
|
|
34
|
+
expected_keywords.update({"runtime", "lock", "run_controls", "doctor", "status"})
|
|
35
|
+
if "dashboard" in normalized:
|
|
36
|
+
expected_keywords.update({"dashboard", "session", "files", "route"})
|
|
37
|
+
if "policy" in normalized:
|
|
38
|
+
expected_keywords.update({"policy", "hook", "enforcement"})
|
|
39
|
+
if "gate" in normalized or "review" in normalized:
|
|
40
|
+
expected_keywords.update({"gate", "review", "control_plane"})
|
|
41
|
+
has_test_file_evidence = bool(ctx.tests_touched)
|
|
42
|
+
has_behavior_test_evidence = bool(expected_keywords) and any(keyword in test_text for keyword in expected_keywords)
|
|
43
|
+
if changed_critical and (not has_test_file_evidence or not verification_performed):
|
|
44
|
+
findings.append(
|
|
45
|
+
build_finding(
|
|
46
|
+
check_id="testing.missing_critical_tests",
|
|
47
|
+
category=GateCategory.TESTING,
|
|
48
|
+
title="Critical behavior changed without test coverage evidence",
|
|
49
|
+
severity=GateSeverity.HIGH,
|
|
50
|
+
impact=GateImpact.REVISE,
|
|
51
|
+
summary="Touched critical roots but no meaningful test file changes or verification commands were recorded.",
|
|
52
|
+
recommendation="Add or update tests and record the executed verification commands in the handoff.",
|
|
53
|
+
|
|
54
|
+
repair_kind='add_test',
|
|
55
|
+
executor_action='Add tests for coverage',
|
|
56
|
+
proof_required='Tests added/passing',
|
|
57
|
+
allowlist_allowed=True,
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
elif changed_critical and expected_keywords and not has_behavior_test_evidence:
|
|
61
|
+
findings.append(
|
|
62
|
+
build_finding(
|
|
63
|
+
check_id="testing.behavior_mismatch",
|
|
64
|
+
category=GateCategory.TESTING,
|
|
65
|
+
title="Recorded test evidence does not match the changed critical behavior",
|
|
66
|
+
severity=GateSeverity.HIGH,
|
|
67
|
+
impact=GateImpact.REVISE,
|
|
68
|
+
summary=(
|
|
69
|
+
f"Critical paths were touched ({', '.join(touched_non_metadata[:3])}), "
|
|
70
|
+
f"but verification evidence did not reference expected behavior keywords: {', '.join(sorted(expected_keywords)[:5])}."
|
|
71
|
+
),
|
|
72
|
+
recommendation="Run behavior-relevant tests or operator flows that exercise the touched critical path.",
|
|
73
|
+
|
|
74
|
+
repair_kind='add_test',
|
|
75
|
+
executor_action='Add tests for coverage',
|
|
76
|
+
proof_required='Tests added/passing',
|
|
77
|
+
allowlist_allowed=True,
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
_TEST_EXECUTION_MARKERS = {"pytest", "python -m pytest", "test_", "passed", "failed", "error"}
|
|
81
|
+
has_test_execution_evidence = any(
|
|
82
|
+
any(marker in entry for marker in _TEST_EXECUTION_MARKERS)
|
|
83
|
+
for entry in verification_performed
|
|
84
|
+
)
|
|
85
|
+
critical_touched_count = sum(1 for path in touched if profile.is_critical(path))
|
|
86
|
+
if changed_critical and not has_test_execution_evidence:
|
|
87
|
+
findings.append(
|
|
88
|
+
build_finding(
|
|
89
|
+
check_id="testing.no_test_execution_evidence",
|
|
90
|
+
category=GateCategory.TESTING,
|
|
91
|
+
title="No test execution evidence found for critical file changes",
|
|
92
|
+
severity=GateSeverity.HIGH,
|
|
93
|
+
impact=GateImpact.REVISE,
|
|
94
|
+
summary="Critical files were touched but verification_performed contains no test execution markers (pytest, passed, failed, error).",
|
|
95
|
+
recommendation="Run the test suite and record the output in verification_performed.",
|
|
96
|
+
|
|
97
|
+
repair_kind='add_test',
|
|
98
|
+
executor_action='Add tests for coverage',
|
|
99
|
+
proof_required='Tests added/passing',
|
|
100
|
+
allowlist_allowed=True,
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
if critical_touched_count > 3 and len(verification_performed) <= 1:
|
|
104
|
+
findings.append(
|
|
105
|
+
build_finding(
|
|
106
|
+
check_id="testing.insufficient_verification_scope",
|
|
107
|
+
category=GateCategory.TESTING,
|
|
108
|
+
title="Verification scope is too narrow for the number of critical files changed",
|
|
109
|
+
severity=GateSeverity.MEDIUM,
|
|
110
|
+
impact=GateImpact.REVISE,
|
|
111
|
+
summary=f"{critical_touched_count} critical files were touched but only {len(verification_performed)} verification entries recorded.",
|
|
112
|
+
recommendation="Provide proportional verification coverage: add per-module test runs or behavioral checks for each critical area changed.",
|
|
113
|
+
|
|
114
|
+
repair_kind='add_test',
|
|
115
|
+
executor_action='Add tests for coverage',
|
|
116
|
+
proof_required='Tests added/passing',
|
|
117
|
+
allowlist_allowed=True,
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
# SL-6: ExecutorHandoffAssessment has no canonical status field in Sprint A.
|
|
121
|
+
# Use report.result_claim as the truthful success signal: parse_executor_handoff_report
|
|
122
|
+
# restricts result_claim to {"success", "partial", "failed"} and "success" is the
|
|
123
|
+
# only claim that should trigger the contradictory-test-evidence finding.
|
|
124
|
+
handoff_claims_success = (
|
|
125
|
+
report is not None and str(report.result_claim or "").lower() == "success"
|
|
126
|
+
)
|
|
127
|
+
_FAILURE_MARKERS = {"failed", "error"}
|
|
128
|
+
has_failure_in_evidence = any(
|
|
129
|
+
any(marker in entry for marker in _FAILURE_MARKERS)
|
|
130
|
+
for entry in verification_performed
|
|
131
|
+
)
|
|
132
|
+
if has_test_execution_evidence and has_failure_in_evidence and handoff_claims_success:
|
|
133
|
+
findings.append(
|
|
134
|
+
build_finding(
|
|
135
|
+
check_id="testing.contradictory_test_evidence",
|
|
136
|
+
category=GateCategory.TESTING,
|
|
137
|
+
title="Test evidence contains failures but handoff claims success",
|
|
138
|
+
severity=GateSeverity.HIGH,
|
|
139
|
+
impact=GateImpact.REVISE,
|
|
140
|
+
summary="verification_performed includes test execution with failure/error markers, but the handoff status reports success.",
|
|
141
|
+
recommendation="Resolve all test failures before marking the handoff as successful.",
|
|
142
|
+
|
|
143
|
+
repair_kind='add_test',
|
|
144
|
+
executor_action='Add tests for coverage',
|
|
145
|
+
proof_required='Tests added/passing',
|
|
146
|
+
allowlist_allowed=True,
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
return build_check_result(check_id="testing", category=GateCategory.TESTING, findings=findings)
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
"""TOCTOU check-then-act detector (Finding G.1 plan v7).
|
|
2
|
+
|
|
3
|
+
Detects Time-Of-Check-To-Time-Of-Use patterns where a resource-existence
|
|
4
|
+
check (exists, is_port_in_use, etc.) is immediately followed by a mutation
|
|
5
|
+
(write_text, unlink, socket.bind, etc.) without an atomic guard (with-block
|
|
6
|
+
or explicit acquire_atomic call) in between.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import ast
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity, RepairKind
|
|
14
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
15
|
+
from ..source_analysis import is_source_file
|
|
16
|
+
from .common import build_check_result, build_finding, normalize_path
|
|
17
|
+
|
|
18
|
+
_log = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Functions whose presence indicates a resource-existence check
|
|
21
|
+
CHECK_FUNCS = frozenset({
|
|
22
|
+
"exists",
|
|
23
|
+
"is_file",
|
|
24
|
+
"is_dir",
|
|
25
|
+
"is_port_in_use",
|
|
26
|
+
"is_running",
|
|
27
|
+
"is_pid_alive",
|
|
28
|
+
"is_alive",
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
# Functions whose presence indicates a mutation on the resource
|
|
32
|
+
MUTATION_FUNCS = frozenset({
|
|
33
|
+
"write_text",
|
|
34
|
+
"write_bytes",
|
|
35
|
+
"unlink",
|
|
36
|
+
"rename",
|
|
37
|
+
"replace",
|
|
38
|
+
"mkdir",
|
|
39
|
+
"rmdir",
|
|
40
|
+
"rmtree",
|
|
41
|
+
"copy",
|
|
42
|
+
"copy2",
|
|
43
|
+
"move",
|
|
44
|
+
"bind",
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
# Calls that indicate the pattern is guarded atomically
|
|
48
|
+
ATOMIC_HINTS = frozenset({
|
|
49
|
+
"acquire_atomic",
|
|
50
|
+
"acquire_atomic_with_atexit",
|
|
51
|
+
"acquire",
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
# Leftmost receiver names recognised as path-handling modules. When a call
|
|
55
|
+
# looks like ``<module>.func('/path', ...)`` we treat the first positional
|
|
56
|
+
# string arg as the resource path. Keep this list conservative to avoid
|
|
57
|
+
# mis-extracting payloads from method calls on user objects.
|
|
58
|
+
_MODULE_NAMES = frozenset({
|
|
59
|
+
"os",
|
|
60
|
+
"path",
|
|
61
|
+
"pathlib",
|
|
62
|
+
"shutil",
|
|
63
|
+
"io",
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
# How many subsequent statements to inspect after a check-call
|
|
67
|
+
_LOOKAHEAD = 10
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
# Internal helpers
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
from .atomic_write_checks import _get_call_name
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _get_receiver_var(node: ast.Call) -> str | None:
|
|
78
|
+
"""Return the variable name of the method receiver, if any."""
|
|
79
|
+
if isinstance(node.func, ast.Attribute):
|
|
80
|
+
if isinstance(node.func.value, ast.Name):
|
|
81
|
+
return node.func.value.id
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _extract_literal_path(call_node: ast.Call) -> str | None:
|
|
86
|
+
"""Return a string literal that identifies the resource being operated on.
|
|
87
|
+
|
|
88
|
+
Two patterns are handled, in this order of priority:
|
|
89
|
+
|
|
90
|
+
1. Method-on-constructor — ``Path('/tmp/x').exists()``,
|
|
91
|
+
``Path('/tmp/x').write_text('data')``:
|
|
92
|
+
The call is an attribute access whose receiver is itself a ``Call``
|
|
93
|
+
(e.g. ``Path('...')``) with a string-literal first positional argument.
|
|
94
|
+
We extract the literal from the *receiver*, because the outer call's
|
|
95
|
+
first positional arg is typically payload data (e.g. ``'data'`` for
|
|
96
|
+
``write_text``), not a path.
|
|
97
|
+
|
|
98
|
+
2. Direct string arg — ``os.path.exists('/tmp/x')`` or
|
|
99
|
+
``open('/tmp/x', 'w')``:
|
|
100
|
+
The call is a free function (``ast.Name``) or a non-constructor
|
|
101
|
+
attribute chain (e.g. ``os.path.exists``) whose first positional
|
|
102
|
+
argument is a string constant representing the resource path.
|
|
103
|
+
|
|
104
|
+
We explicitly gate pattern 2 on the call *not* being a method call whose
|
|
105
|
+
receiver looks like a path-constructor call, to avoid misinterpreting
|
|
106
|
+
``Path('/tmp/x').write_text('data')`` as the resource being ``'data'``.
|
|
107
|
+
"""
|
|
108
|
+
# Pattern 1: method call on a constructor-style receiver
|
|
109
|
+
# e.g. Path('/tmp/x').exists() → func = Attribute(value=Call(args=['/tmp/x']), attr='exists')
|
|
110
|
+
# The receiver itself carries the path literal; the outer call's first
|
|
111
|
+
# positional arg (if any) is typically payload, not a path.
|
|
112
|
+
if isinstance(call_node.func, ast.Attribute):
|
|
113
|
+
receiver = call_node.func.value
|
|
114
|
+
if isinstance(receiver, ast.Call):
|
|
115
|
+
if receiver.args:
|
|
116
|
+
inner_arg = receiver.args[0]
|
|
117
|
+
if isinstance(inner_arg, ast.Constant) and isinstance(inner_arg.value, str):
|
|
118
|
+
return inner_arg.value
|
|
119
|
+
# Receiver is a Call but no literal arg → unknown, do not fall
|
|
120
|
+
# through to outer args (they are payload for the method).
|
|
121
|
+
return None
|
|
122
|
+
# Receiver is a Name / Attribute (e.g. ``p.write_text('data')`` or
|
|
123
|
+
# ``os.path.exists('/tmp/x')``). For free-function-style attribute
|
|
124
|
+
# chains like ``os.path.exists``, the first positional arg IS a path;
|
|
125
|
+
# for bound method calls like ``p.write_text``, the first positional
|
|
126
|
+
# arg is payload. We cannot distinguish these at pure-AST level
|
|
127
|
+
# without a whitelist, so we only extract the literal when the
|
|
128
|
+
# attribute chain's leftmost receiver is a Name that looks like a
|
|
129
|
+
# module (os, path, pathlib, shutil). This keeps the check precise
|
|
130
|
+
# for free functions while avoiding payload-as-path FPs on methods.
|
|
131
|
+
if isinstance(receiver, ast.Name) and receiver.id in _MODULE_NAMES:
|
|
132
|
+
if call_node.args:
|
|
133
|
+
arg = call_node.args[0]
|
|
134
|
+
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
|
|
135
|
+
return arg.value
|
|
136
|
+
# Otherwise (bound method on a plain variable) → no literal
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
# Pattern 2: plain free function (``exists('/tmp/x')``, ``open('/tmp/x', 'w')``):
|
|
140
|
+
# func is an ``ast.Name``. First positional string arg is the path.
|
|
141
|
+
if isinstance(call_node.func, ast.Name) and call_node.args:
|
|
142
|
+
arg = call_node.args[0]
|
|
143
|
+
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
|
|
144
|
+
return arg.value
|
|
145
|
+
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _collect_alias_assignments(
|
|
150
|
+
func_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
151
|
+
) -> dict[str, int]:
|
|
152
|
+
"""Return mapping of assigned variable name → statement index in func body.
|
|
153
|
+
|
|
154
|
+
Only considers top-level Assign statements (not augmented, not annotated)
|
|
155
|
+
with a single Name target. Used to detect simple aliasing:
|
|
156
|
+
lock_file = base / "lock"
|
|
157
|
+
where both the check and the mutation reference ``lock_file``.
|
|
158
|
+
"""
|
|
159
|
+
result: dict[str, int] = {}
|
|
160
|
+
for idx, stmt in enumerate(func_node.body):
|
|
161
|
+
if isinstance(stmt, ast.Assign):
|
|
162
|
+
for target in stmt.targets:
|
|
163
|
+
if isinstance(target, ast.Name):
|
|
164
|
+
result[target.id] = idx
|
|
165
|
+
return result
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _find_check_call(stmt: ast.stmt) -> dict | None:
|
|
169
|
+
"""Return {func, var, literal, line} if *stmt* contains a CHECK_FUNCS call; else None."""
|
|
170
|
+
for node in ast.walk(stmt):
|
|
171
|
+
if isinstance(node, ast.Call):
|
|
172
|
+
name = _get_call_name(node)
|
|
173
|
+
if name in CHECK_FUNCS:
|
|
174
|
+
return {
|
|
175
|
+
"func": name,
|
|
176
|
+
"var": _get_receiver_var(node),
|
|
177
|
+
"literal": _extract_literal_path(node),
|
|
178
|
+
"line": node.lineno,
|
|
179
|
+
"call_node": node,
|
|
180
|
+
}
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _find_mutation_call(stmt: ast.stmt) -> dict | None:
|
|
185
|
+
"""Return {func, var, literal, line} if *stmt* contains a MUTATION_FUNCS call; else None."""
|
|
186
|
+
for node in ast.walk(stmt):
|
|
187
|
+
if isinstance(node, ast.Call):
|
|
188
|
+
name = _get_call_name(node)
|
|
189
|
+
if name in MUTATION_FUNCS:
|
|
190
|
+
return {
|
|
191
|
+
"func": name,
|
|
192
|
+
"var": _get_receiver_var(node),
|
|
193
|
+
"literal": _extract_literal_path(node),
|
|
194
|
+
"line": node.lineno,
|
|
195
|
+
"call_node": node,
|
|
196
|
+
}
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _has_atomic_call(stmt: ast.stmt) -> bool:
|
|
201
|
+
"""Return True if *stmt* contains any ATOMIC_HINTS call."""
|
|
202
|
+
for node in ast.walk(stmt):
|
|
203
|
+
if isinstance(node, ast.Call):
|
|
204
|
+
if _get_call_name(node) in ATOMIC_HINTS:
|
|
205
|
+
return True
|
|
206
|
+
return False
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _same_resource(
|
|
210
|
+
check: dict,
|
|
211
|
+
mutation: dict,
|
|
212
|
+
alias_assignments: dict[str, int] | None = None,
|
|
213
|
+
) -> bool:
|
|
214
|
+
"""Return True when check and mutation operate on the same resource.
|
|
215
|
+
|
|
216
|
+
Three matching strategies (in priority order):
|
|
217
|
+
|
|
218
|
+
1. Literal string match — both calls pass an identical string constant as
|
|
219
|
+
their first positional argument, e.g.::
|
|
220
|
+
|
|
221
|
+
Path("/tmp/x").exists() → Path("/tmp/x").write_text(...)
|
|
222
|
+
|
|
223
|
+
2. Same receiver variable — both calls are method calls on the same Name
|
|
224
|
+
node, e.g.::
|
|
225
|
+
|
|
226
|
+
p.exists() → p.write_text(...)
|
|
227
|
+
|
|
228
|
+
3. Alias match — both receiver variables were assigned in the same function
|
|
229
|
+
scope (via a simple ``Name = <expr>`` assignment), indicating they are
|
|
230
|
+
aliases for a common underlying resource, e.g.::
|
|
231
|
+
|
|
232
|
+
lock_file = base / "lock"
|
|
233
|
+
lock_file.exists() → lock_file.write_text(...)
|
|
234
|
+
|
|
235
|
+
Note: strategy 3 only fires when both calls reference the *same*
|
|
236
|
+
variable name that appears in ``alias_assignments``. It does NOT fire
|
|
237
|
+
when two *different* aliased variables are used (different-aliases case),
|
|
238
|
+
because that would require value-equality analysis beyond AST scope.
|
|
239
|
+
"""
|
|
240
|
+
# Strategy 1: literal string comparison
|
|
241
|
+
lit_c = check.get("literal")
|
|
242
|
+
lit_m = mutation.get("literal")
|
|
243
|
+
if lit_c is not None and lit_m is not None and lit_c == lit_m:
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
# Strategy 2: same receiver variable name
|
|
247
|
+
var_c = check.get("var")
|
|
248
|
+
var_m = mutation.get("var")
|
|
249
|
+
if var_c and var_m:
|
|
250
|
+
if var_c == var_m:
|
|
251
|
+
return True
|
|
252
|
+
# Strategy 3: alias — different variable names that were both assigned
|
|
253
|
+
# in the same scope. We only flag when they share the *same* name
|
|
254
|
+
# (strategy 2 already covers that), so reaching here means var_c !=
|
|
255
|
+
# var_m → skip to avoid false positives on truly different aliases.
|
|
256
|
+
|
|
257
|
+
# Cannot confirm same resource → conservative, do not flag
|
|
258
|
+
return False
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _detect_toctou_in_function(
|
|
262
|
+
func_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
263
|
+
file_path: str,
|
|
264
|
+
) -> list[dict]:
|
|
265
|
+
"""Walk top-level statements of a function and return raw finding dicts."""
|
|
266
|
+
raw: list[dict] = []
|
|
267
|
+
body = func_node.body
|
|
268
|
+
alias_assignments = _collect_alias_assignments(func_node)
|
|
269
|
+
for i, stmt in enumerate(body):
|
|
270
|
+
check_info = _find_check_call(stmt)
|
|
271
|
+
if not check_info:
|
|
272
|
+
continue
|
|
273
|
+
for look_ahead in body[i + 1: i + 1 + _LOOKAHEAD]:
|
|
274
|
+
# A with-block implies context manager / lock → safe
|
|
275
|
+
if isinstance(look_ahead, ast.With):
|
|
276
|
+
break
|
|
277
|
+
# An explicit atomic-hint call → safe
|
|
278
|
+
if _has_atomic_call(look_ahead):
|
|
279
|
+
break
|
|
280
|
+
mut_info = _find_mutation_call(look_ahead)
|
|
281
|
+
if mut_info and _same_resource(check_info, mut_info, alias_assignments):
|
|
282
|
+
raw.append({
|
|
283
|
+
"file": file_path,
|
|
284
|
+
"check_func": check_info["func"],
|
|
285
|
+
"check_line": check_info["line"],
|
|
286
|
+
"mut_func": mut_info["func"],
|
|
287
|
+
"mut_line": mut_info["line"],
|
|
288
|
+
})
|
|
289
|
+
break
|
|
290
|
+
return raw
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
# ---------------------------------------------------------------------------
|
|
294
|
+
# Public gate entry-point
|
|
295
|
+
# ---------------------------------------------------------------------------
|
|
296
|
+
|
|
297
|
+
def run_toctou_check_then_act(ctx: PostExecGateContext):
|
|
298
|
+
"""Detect TOCTOU check-then-act races in changed Python files.
|
|
299
|
+
|
|
300
|
+
For each .py file in ctx.changed_files_observed:
|
|
301
|
+
1. Parse the AST.
|
|
302
|
+
2. Walk all function defs (including nested ones).
|
|
303
|
+
3. Within each function body, detect a check-call immediately followed
|
|
304
|
+
(within _LOOKAHEAD statements) by a mutation on the same resource,
|
|
305
|
+
with no intervening with-block or atomic-hint call.
|
|
306
|
+
|
|
307
|
+
Fail-open: parse errors / missing files -> DEBUG log, skip, never raise.
|
|
308
|
+
"""
|
|
309
|
+
findings = []
|
|
310
|
+
|
|
311
|
+
for raw_path in ctx.changed_files_observed:
|
|
312
|
+
normalized = normalize_path(raw_path)
|
|
313
|
+
if not is_source_file(normalized):
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
abs_path = ctx.project_dir / normalized
|
|
317
|
+
try:
|
|
318
|
+
src = abs_path.read_text(encoding="utf-8")
|
|
319
|
+
tree = ast.parse(src)
|
|
320
|
+
except (OSError, SyntaxError, UnicodeDecodeError) as exc:
|
|
321
|
+
_log.debug("toctou_check: failed to parse %s: %s", normalized, exc)
|
|
322
|
+
continue
|
|
323
|
+
|
|
324
|
+
for node in ast.walk(tree):
|
|
325
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
326
|
+
raw_hits = _detect_toctou_in_function(node, normalized)
|
|
327
|
+
for hit in raw_hits:
|
|
328
|
+
findings.append(
|
|
329
|
+
build_finding(
|
|
330
|
+
check_id="toctou_check_then_act.race_window",
|
|
331
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
332
|
+
title="TOCTOU race: non-atomic check-then-act on shared resource",
|
|
333
|
+
severity=GateSeverity.MEDIUM,
|
|
334
|
+
impact=GateImpact.REVISE,
|
|
335
|
+
summary=(
|
|
336
|
+
f"{hit['file']}: {hit['check_func']}() at line "
|
|
337
|
+
f"{hit['check_line']} followed by {hit['mut_func']}() "
|
|
338
|
+
f"at line {hit['mut_line']} without atomic guard -- "
|
|
339
|
+
"another process may alter the resource between check and act."
|
|
340
|
+
),
|
|
341
|
+
recommendation=(
|
|
342
|
+
"Wrap the check+act sequence in a context manager or use an "
|
|
343
|
+
"atomic operation (e.g. open(..., 'x'), os.replace, "
|
|
344
|
+
"acquire_atomic) to eliminate the race window."
|
|
345
|
+
),
|
|
346
|
+
evidence=[
|
|
347
|
+
EvidenceReference(
|
|
348
|
+
kind="file",
|
|
349
|
+
path=hit["file"],
|
|
350
|
+
detail=(
|
|
351
|
+
f"check={hit['check_func']}:L{hit['check_line']} "
|
|
352
|
+
f"mutation={hit['mut_func']}:L{hit['mut_line']}"
|
|
353
|
+
),
|
|
354
|
+
)
|
|
355
|
+
],
|
|
356
|
+
repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
|
|
357
|
+
executor_action="Add check-then-act guard",
|
|
358
|
+
proof_required="TOCTOU pattern fixed",
|
|
359
|
+
allowlist_allowed=False,
|
|
360
|
+
)
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
return build_check_result(
|
|
364
|
+
check_id="toctou_check_then_act",
|
|
365
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
366
|
+
findings=findings,
|
|
367
|
+
)
|