vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,842 @@
|
|
|
1
|
+
"""Edit, mutation, and static scan clusters 10-17.
|
|
2
|
+
|
|
3
|
+
Clusters:
|
|
4
|
+
10 - Edit Consistency (LLM-specific)
|
|
5
|
+
11 - Mutation Without Verification (LLM-specific)
|
|
6
|
+
12 - Security Patterns
|
|
7
|
+
13 - Test Quality
|
|
8
|
+
14 - Import Cycles
|
|
9
|
+
15 - Roundtrip Consistency
|
|
10
|
+
16 - Shared Mutable State
|
|
11
|
+
17 - Dependency Vulnerabilities
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import ast
|
|
16
|
+
import hashlib
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
from .core import detect_language, _insufficient_evidence_finding
|
|
21
|
+
from ...gate_models import (
|
|
22
|
+
EvidenceReference,
|
|
23
|
+
GateCategory,
|
|
24
|
+
GateFinding,
|
|
25
|
+
GateImpact,
|
|
26
|
+
GateSeverity,
|
|
27
|
+
RepairKind,
|
|
28
|
+
)
|
|
29
|
+
from ..common import build_finding
|
|
30
|
+
from .._ast_helpers import collect_string_constant_line_ranges
|
|
31
|
+
import logging
|
|
32
|
+
_log = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
# Cluster 10: Edit Consistency (LLM-specific)
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def assess_edit_consistency(
|
|
41
|
+
instances: dict[str, str],
|
|
42
|
+
expected_pattern: str,
|
|
43
|
+
) -> list[GateFinding]:
|
|
44
|
+
"""Cluster 10: All instances that should match a pattern actually match.
|
|
45
|
+
|
|
46
|
+
Catches LLM Edit Amnesia: fix one occurrence, miss others.
|
|
47
|
+
instances: {"file:line": "actual_code", ...}
|
|
48
|
+
expected_pattern: regex that all instances should match
|
|
49
|
+
"""
|
|
50
|
+
import re
|
|
51
|
+
|
|
52
|
+
if not instances:
|
|
53
|
+
return [] # NOT_APPLICABLE
|
|
54
|
+
|
|
55
|
+
findings: list[GateFinding] = []
|
|
56
|
+
for location, code in instances.items():
|
|
57
|
+
matches = bool(re.search(expected_pattern, code))
|
|
58
|
+
if not matches:
|
|
59
|
+
findings.append(build_finding(
|
|
60
|
+
check_id="edit_consistency",
|
|
61
|
+
category=GateCategory.DRIFT,
|
|
62
|
+
title=f"[edit_consistency] {location}",
|
|
63
|
+
severity=GateSeverity.MEDIUM,
|
|
64
|
+
impact=GateImpact.REVISE,
|
|
65
|
+
summary=f"Instance at {location} DOES NOT match expected pattern",
|
|
66
|
+
recommendation=f"Fix inconsistent edit at {location} to match pattern: {expected_pattern}",
|
|
67
|
+
evidence=(EvidenceReference(kind="probe", path=location.split(":", 1)[0] if ":" in location else location, detail="DOES NOT match pattern", ok=False),),
|
|
68
|
+
repair_kind=RepairKind.EDIT_CANONICAL.value,
|
|
69
|
+
executor_action=f"Fix inconsistent edit at {location}",
|
|
70
|
+
))
|
|
71
|
+
return findings
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Cluster 11: Mutation Without Verification (LLM-specific)
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def assess_mutation_verified(
|
|
80
|
+
file_path: str,
|
|
81
|
+
expected_content_hash: str,
|
|
82
|
+
actual_content_hash: Optional[str] = None,
|
|
83
|
+
project_dir: Optional[Path] = None,
|
|
84
|
+
) -> list[GateFinding]:
|
|
85
|
+
"""Cluster 11: After mutation, verify the file actually contains expected content.
|
|
86
|
+
|
|
87
|
+
Catches LLM Stale Reference: edit applied mentally but not on disk (stash drop,
|
|
88
|
+
parallel agent overwrite, linter revert).
|
|
89
|
+
|
|
90
|
+
Sprint C3 (2026-04-25): when ``project_dir`` is supplied, relative
|
|
91
|
+
``file_path`` values resolve against it instead of CWD. The runner stores
|
|
92
|
+
snapshot keys in repo-relative form (see ``normalize_path``), so without
|
|
93
|
+
the explicit anchor a "file missing" finding fired on files that exist on
|
|
94
|
+
disk simply because the gate's CWD differed from the project root.
|
|
95
|
+
"""
|
|
96
|
+
if actual_content_hash is None:
|
|
97
|
+
# Compute from disk. Resolve relative paths against project_dir when
|
|
98
|
+
# the caller supplies it; absolute paths pass through unchanged.
|
|
99
|
+
candidate = Path(file_path)
|
|
100
|
+
if project_dir is not None and not candidate.is_absolute():
|
|
101
|
+
p = (Path(project_dir) / candidate).resolve()
|
|
102
|
+
else:
|
|
103
|
+
p = candidate
|
|
104
|
+
if not p.exists():
|
|
105
|
+
return [build_finding(
|
|
106
|
+
check_id="mutation_verified",
|
|
107
|
+
category=GateCategory.DRIFT,
|
|
108
|
+
title=f"[mutation_unverified] {file_path}",
|
|
109
|
+
severity=GateSeverity.HIGH,
|
|
110
|
+
impact=GateImpact.REVISE,
|
|
111
|
+
summary=f"Mutated file missing: {file_path}",
|
|
112
|
+
recommendation="Verify the file was actually written to disk.",
|
|
113
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail="File does not exist after mutation", ok=False),),
|
|
114
|
+
repair_kind=RepairKind.ADD_REGRESSION_TEST.value,
|
|
115
|
+
executor_action=f"Re-apply mutation to {file_path}",
|
|
116
|
+
)]
|
|
117
|
+
actual_content_hash = hashlib.sha256(p.read_bytes()).hexdigest()
|
|
118
|
+
|
|
119
|
+
if expected_content_hash != actual_content_hash:
|
|
120
|
+
return [build_finding(
|
|
121
|
+
check_id="mutation_verified",
|
|
122
|
+
category=GateCategory.DRIFT,
|
|
123
|
+
title=f"[mutation_unverified] {file_path}",
|
|
124
|
+
severity=GateSeverity.HIGH,
|
|
125
|
+
impact=GateImpact.REVISE,
|
|
126
|
+
summary=f"File content DIVERGED from expected: {file_path}",
|
|
127
|
+
recommendation="Re-apply the intended mutation; content was overwritten or reverted.",
|
|
128
|
+
evidence=(EvidenceReference(
|
|
129
|
+
kind="probe",
|
|
130
|
+
path=file_path,
|
|
131
|
+
detail=f"MISMATCH: expected {expected_content_hash[:16]}..., got {actual_content_hash[:16]}...",
|
|
132
|
+
ok=False,
|
|
133
|
+
),),
|
|
134
|
+
repair_kind=RepairKind.ADD_REGRESSION_TEST.value,
|
|
135
|
+
executor_action=f"Re-apply mutation to {file_path}",
|
|
136
|
+
)]
|
|
137
|
+
return [] # PASS
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# ---------------------------------------------------------------------------
|
|
141
|
+
# Cluster 12: Security Patterns
|
|
142
|
+
# ---------------------------------------------------------------------------
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
_SECURITY_DANGEROUS_PATTERNS: tuple[tuple[str, str], ...] = (
|
|
146
|
+
(r"\beval\s*\(", "eval() usage -- potential code injection"),
|
|
147
|
+
(r"\bexec\s*\(", "exec() usage -- potential code injection"),
|
|
148
|
+
(r"\bos\.system\s*\(", "os.system() -- prefer subprocess with shell=False"),
|
|
149
|
+
(r"shell\s*=\s*True", "subprocess with shell=True -- command injection risk"),
|
|
150
|
+
(r"__import__\s*\([^'\"]", "dynamic __import__() with non-literal -- potential injection vector"),
|
|
151
|
+
(r"\bpickle\.loads?\s*\(", "pickle deserialization -- arbitrary code execution risk"),
|
|
152
|
+
(r"\byaml\.load\s*\((?!.*Loader)", "yaml.load without SafeLoader -- code execution risk"),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
_SECURITY_JS_PATTERNS: tuple[tuple[str, str], ...] = (
|
|
156
|
+
(r"\beval\s*\(", "eval() usage -- potential code injection"),
|
|
157
|
+
(r"\bnew\s+Function\s*\(", "new Function() -- dynamic code execution"),
|
|
158
|
+
(r"innerHTML\s*=", "innerHTML assignment -- XSS risk"),
|
|
159
|
+
(r"document\.write\s*\(", "document.write -- XSS risk"),
|
|
160
|
+
(r"dangerouslySetInnerHTML", "React dangerouslySetInnerHTML -- XSS risk"),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# SQL-clause structure: must look like an actual SQL statement (keyword + target
|
|
164
|
+
# identifier + structural follow-on), not a verb in prose. Checked against the
|
|
165
|
+
# literal template of an f-string / .format() / %-format string, *not* a raw line.
|
|
166
|
+
_SQL_STRUCTURE_RE = __import__("re").compile(
|
|
167
|
+
r"(?i)\b("
|
|
168
|
+
r"SELECT\s+[\w*,\s()]+\s+FROM\s+\w"
|
|
169
|
+
r"|INSERT\s+INTO\s+\w+"
|
|
170
|
+
r"|UPDATE\s+\w+\s+SET\s+\w"
|
|
171
|
+
r"|DELETE\s+FROM\s+\w"
|
|
172
|
+
r"|DROP\s+TABLE\s+\w"
|
|
173
|
+
r"|CREATE\s+(?:TABLE|INDEX|VIEW)\s+\w"
|
|
174
|
+
r"|ALTER\s+TABLE\s+\w"
|
|
175
|
+
r")",
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Minimum literal length to plausibly contain a real query (keyword + target + clause).
|
|
179
|
+
_MIN_SQL_QUERY_LEN = 20
|
|
180
|
+
|
|
181
|
+
# DB-call attribute names: literal must be inside one of these calls on a
|
|
182
|
+
# Python AST for the SQL-injection rule to fire.
|
|
183
|
+
_DB_CALL_ATTRS: frozenset[str] = frozenset({
|
|
184
|
+
"execute", "executemany", "executescript", "query", "raw",
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _py_fstring_template(node: ast.JoinedStr) -> tuple[str, bool]:
|
|
189
|
+
"""Extract the literal template of an f-string and whether it has interpolations."""
|
|
190
|
+
parts: list[str] = []
|
|
191
|
+
has_interp = False
|
|
192
|
+
for value in node.values:
|
|
193
|
+
if isinstance(value, ast.FormattedValue):
|
|
194
|
+
has_interp = True
|
|
195
|
+
parts.append(" {} ") # placeholder to preserve spacing
|
|
196
|
+
elif isinstance(value, ast.Constant) and isinstance(value.value, str):
|
|
197
|
+
parts.append(value.value)
|
|
198
|
+
return "".join(parts), has_interp
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _py_fstring_is_sql(node: ast.JoinedStr) -> bool:
|
|
202
|
+
template, has_interp = _py_fstring_template(node)
|
|
203
|
+
if not has_interp:
|
|
204
|
+
return False
|
|
205
|
+
if len(template) < _MIN_SQL_QUERY_LEN:
|
|
206
|
+
return False
|
|
207
|
+
return bool(_SQL_STRUCTURE_RE.search(template))
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _py_format_call_is_sql(node: ast.Call) -> bool:
|
|
211
|
+
"""True if ``"<literal>".format(...)`` with SQL-clause structure in the literal."""
|
|
212
|
+
if not isinstance(node.func, ast.Attribute) or node.func.attr != "format":
|
|
213
|
+
return False
|
|
214
|
+
tmpl = node.func.value
|
|
215
|
+
if not (isinstance(tmpl, ast.Constant) and isinstance(tmpl.value, str)):
|
|
216
|
+
return False
|
|
217
|
+
literal = tmpl.value
|
|
218
|
+
if len(literal) < _MIN_SQL_QUERY_LEN:
|
|
219
|
+
return False
|
|
220
|
+
return bool(_SQL_STRUCTURE_RE.search(literal))
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _py_pct_format_is_sql(node: ast.BinOp) -> bool:
|
|
224
|
+
"""True if ``"<literal with %s>" % (...)`` with SQL-clause structure."""
|
|
225
|
+
if not isinstance(node.op, ast.Mod):
|
|
226
|
+
return False
|
|
227
|
+
left = node.left
|
|
228
|
+
if not (isinstance(left, ast.Constant) and isinstance(left.value, str)):
|
|
229
|
+
return False
|
|
230
|
+
literal = left.value
|
|
231
|
+
if "%s" not in literal and "%(" not in literal:
|
|
232
|
+
return False
|
|
233
|
+
if len(literal) < _MIN_SQL_QUERY_LEN:
|
|
234
|
+
return False
|
|
235
|
+
return bool(_SQL_STRUCTURE_RE.search(literal))
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _flatten_add_chain(node: ast.AST) -> list[ast.AST]:
|
|
239
|
+
"""Flatten a left-associative ``a + b + c`` chain into ``[a, b, c]``.
|
|
240
|
+
|
|
241
|
+
Only ``+`` (``ast.Add``) is unrolled; any other node is returned as a single
|
|
242
|
+
leaf. Used to inspect every operand of a string-concatenation query.
|
|
243
|
+
"""
|
|
244
|
+
if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add):
|
|
245
|
+
return _flatten_add_chain(node.left) + _flatten_add_chain(node.right)
|
|
246
|
+
return [node]
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _py_concat_is_sql(node: ast.BinOp) -> bool:
|
|
250
|
+
"""True if ``"<sql literal>" + <var> [+ ...]`` builds a query with a variable.
|
|
251
|
+
|
|
252
|
+
SQL-injection via string concatenation. Fires only when the ``+`` chain
|
|
253
|
+
BOTH (a) contains a string literal with real SQL-clause structure and
|
|
254
|
+
(b) has at least one NON-literal operand (a variable / call / attribute the
|
|
255
|
+
caller interpolates in). A constant ``"SELECT ... " + "WHERE ..."`` (all
|
|
256
|
+
literals) is a static query and must NOT fire — no injection vector.
|
|
257
|
+
"""
|
|
258
|
+
if not isinstance(node.op, ast.Add):
|
|
259
|
+
return False
|
|
260
|
+
operands = _flatten_add_chain(node)
|
|
261
|
+
literal_parts: list[str] = []
|
|
262
|
+
has_non_literal = False
|
|
263
|
+
for operand in operands:
|
|
264
|
+
if isinstance(operand, ast.Constant) and isinstance(operand.value, str):
|
|
265
|
+
literal_parts.append(operand.value)
|
|
266
|
+
else:
|
|
267
|
+
has_non_literal = True
|
|
268
|
+
if not has_non_literal:
|
|
269
|
+
return False # constant string concatenation — not dynamic
|
|
270
|
+
combined = "".join(literal_parts)
|
|
271
|
+
if len(combined) < _MIN_SQL_QUERY_LEN:
|
|
272
|
+
return False
|
|
273
|
+
return bool(_SQL_STRUCTURE_RE.search(combined))
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _detect_python_sql_injection(tree: ast.AST) -> list[tuple[int, str]]:
|
|
277
|
+
"""Return ``(lineno, description)`` for dynamic SQL passed to DB-call sites.
|
|
278
|
+
|
|
279
|
+
AST-level: only fires when a dynamically-built literal is the first argument
|
|
280
|
+
of ``.execute / .executemany / .executescript / .query / .raw`` AND the
|
|
281
|
+
literal template has real SQL-clause structure (not just a keyword). Covers
|
|
282
|
+
f-string, ``.format()``, ``%``-format, and ``+`` string concatenation with
|
|
283
|
+
an interpolated variable.
|
|
284
|
+
"""
|
|
285
|
+
hits: list[tuple[int, str]] = []
|
|
286
|
+
for node in ast.walk(tree):
|
|
287
|
+
if not isinstance(node, ast.Call):
|
|
288
|
+
continue
|
|
289
|
+
if not isinstance(node.func, ast.Attribute):
|
|
290
|
+
continue
|
|
291
|
+
if node.func.attr not in _DB_CALL_ATTRS:
|
|
292
|
+
continue
|
|
293
|
+
if not node.args:
|
|
294
|
+
continue
|
|
295
|
+
first = node.args[0]
|
|
296
|
+
lineno = getattr(node, "lineno", 1) or 1
|
|
297
|
+
if isinstance(first, ast.JoinedStr) and _py_fstring_is_sql(first):
|
|
298
|
+
hits.append((lineno, "f-string SQL query -- SQL injection risk"))
|
|
299
|
+
elif isinstance(first, ast.Call) and _py_format_call_is_sql(first):
|
|
300
|
+
hits.append((lineno, ".format() SQL query -- SQL injection risk"))
|
|
301
|
+
elif isinstance(first, ast.BinOp) and _py_pct_format_is_sql(first):
|
|
302
|
+
hits.append((lineno, "%-format SQL query -- use parameterized queries"))
|
|
303
|
+
elif isinstance(first, ast.BinOp) and _py_concat_is_sql(first):
|
|
304
|
+
hits.append((lineno, "concatenated SQL query -- SQL injection risk"))
|
|
305
|
+
return hits
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
# Callables whose ``shell=True`` keyword really is a subprocess invocation.
|
|
309
|
+
_SUBPROCESS_SHELL_CALLEES: frozenset[str] = frozenset({
|
|
310
|
+
"run", "Popen", "call", "check_call", "check_output", "getoutput",
|
|
311
|
+
"getstatusoutput", "spawn", "system",
|
|
312
|
+
})
|
|
313
|
+
|
|
314
|
+
# Map of Python dangerous-call signatures -> description. AST-based: fires only
|
|
315
|
+
# on genuine Call nodes, not on pattern strings inside our own source.
|
|
316
|
+
_PY_DANGEROUS_CALLS: dict[tuple[str, ...], str] = {
|
|
317
|
+
("eval",): "eval() usage -- potential code injection",
|
|
318
|
+
("exec",): "exec() usage -- potential code injection",
|
|
319
|
+
("os", "system"): "os.system() -- prefer subprocess with shell=False",
|
|
320
|
+
("pickle", "load"): "pickle deserialization -- arbitrary code execution risk",
|
|
321
|
+
("pickle", "loads"): "pickle deserialization -- arbitrary code execution risk",
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _call_attr_chain(node: ast.AST) -> tuple[str, ...] | None:
|
|
326
|
+
"""Return the attribute chain for a Call's func (e.g. ``pickle.loads`` -> ('pickle','loads')).
|
|
327
|
+
|
|
328
|
+
Returns None if the callee isn't a simple Name or a ``<Name>.<attr>`` chain.
|
|
329
|
+
"""
|
|
330
|
+
if isinstance(node, ast.Name):
|
|
331
|
+
return (node.id,)
|
|
332
|
+
if isinstance(node, ast.Attribute):
|
|
333
|
+
head = node.value
|
|
334
|
+
parts: list[str] = [node.attr]
|
|
335
|
+
while isinstance(head, ast.Attribute):
|
|
336
|
+
parts.append(head.attr)
|
|
337
|
+
head = head.value
|
|
338
|
+
if isinstance(head, ast.Name):
|
|
339
|
+
parts.append(head.id)
|
|
340
|
+
return tuple(reversed(parts))
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _detect_python_dangerous_calls(tree: ast.AST) -> list[tuple[int, str]]:
|
|
345
|
+
"""Return ``(lineno, description)`` for AST Call nodes matching dangerous signatures.
|
|
346
|
+
|
|
347
|
+
Suppresses FP hits on docstrings, regex pattern constants, and prose that
|
|
348
|
+
mentions ``eval(`` / ``exec(`` / etc. textually but never calls them.
|
|
349
|
+
"""
|
|
350
|
+
hits: list[tuple[int, str]] = []
|
|
351
|
+
for node in ast.walk(tree):
|
|
352
|
+
if not isinstance(node, ast.Call):
|
|
353
|
+
continue
|
|
354
|
+
chain = _call_attr_chain(node.func)
|
|
355
|
+
if chain is None:
|
|
356
|
+
continue
|
|
357
|
+
for sig, desc in _PY_DANGEROUS_CALLS.items():
|
|
358
|
+
# Match exact chain OR suffix (`pickle.loads` matches `m.pickle.loads`).
|
|
359
|
+
if len(chain) >= len(sig) and chain[-len(sig):] == sig:
|
|
360
|
+
lineno = getattr(node, "lineno", 1) or 1
|
|
361
|
+
hits.append((lineno, desc))
|
|
362
|
+
break
|
|
363
|
+
# yaml.load without Loader= kwarg.
|
|
364
|
+
if len(chain) >= 2 and chain[-2:] == ("yaml", "load"):
|
|
365
|
+
has_loader = any(kw.arg == "Loader" for kw in node.keywords)
|
|
366
|
+
if not has_loader:
|
|
367
|
+
lineno = getattr(node, "lineno", 1) or 1
|
|
368
|
+
hits.append((lineno, "yaml.load without SafeLoader -- code execution risk"))
|
|
369
|
+
# __import__ with non-literal first arg.
|
|
370
|
+
if chain == ("__import__",) and node.args:
|
|
371
|
+
first = node.args[0]
|
|
372
|
+
if not (isinstance(first, ast.Constant) and isinstance(first.value, str)):
|
|
373
|
+
lineno = getattr(node, "lineno", 1) or 1
|
|
374
|
+
hits.append((
|
|
375
|
+
lineno,
|
|
376
|
+
"dynamic __import__() with non-literal -- potential injection vector",
|
|
377
|
+
))
|
|
378
|
+
return hits
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _detect_python_shell_true(tree: ast.AST) -> list[tuple[int, str]]:
|
|
382
|
+
"""Return ``(lineno, description)`` for real ``subprocess.*(shell=True, ...)`` calls.
|
|
383
|
+
|
|
384
|
+
AST-level: suppresses FP hits on docstrings, regex string constants, and
|
|
385
|
+
prose describing the pattern. Fires only when a Call node has a genuine
|
|
386
|
+
``shell=True`` keyword AND the callee name looks like a subprocess launcher.
|
|
387
|
+
"""
|
|
388
|
+
hits: list[tuple[int, str]] = []
|
|
389
|
+
for node in ast.walk(tree):
|
|
390
|
+
if not isinstance(node, ast.Call):
|
|
391
|
+
continue
|
|
392
|
+
callee_name: str | None = None
|
|
393
|
+
if isinstance(node.func, ast.Attribute):
|
|
394
|
+
callee_name = node.func.attr
|
|
395
|
+
elif isinstance(node.func, ast.Name):
|
|
396
|
+
callee_name = node.func.id
|
|
397
|
+
if callee_name is None or callee_name not in _SUBPROCESS_SHELL_CALLEES:
|
|
398
|
+
continue
|
|
399
|
+
for kw in node.keywords:
|
|
400
|
+
if kw.arg == "shell" and isinstance(kw.value, ast.Constant) and kw.value.value is True:
|
|
401
|
+
lineno = getattr(node, "lineno", 1) or 1
|
|
402
|
+
hits.append((
|
|
403
|
+
lineno,
|
|
404
|
+
"subprocess with shell=True -- command injection risk",
|
|
405
|
+
))
|
|
406
|
+
break
|
|
407
|
+
return hits
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def assess_security_patterns(
|
|
411
|
+
file_path: str,
|
|
412
|
+
content: str,
|
|
413
|
+
) -> list[GateFinding]:
|
|
414
|
+
"""Cluster 12: Scan for dangerous code patterns in any language.
|
|
415
|
+
|
|
416
|
+
For Python, SQL-injection detection is AST-based: a literal is flagged only
|
|
417
|
+
when it is passed to a database-call site (``.execute``, ``.query``, ...)
|
|
418
|
+
AND the template has real SQL-clause structure (``SELECT ... FROM``,
|
|
419
|
+
``UPDATE ... SET``, ``DELETE FROM``, etc.), not just a keyword appearing in
|
|
420
|
+
prose (error messages, log lines, prompts, HTML).
|
|
421
|
+
"""
|
|
422
|
+
import re
|
|
423
|
+
if not content.strip():
|
|
424
|
+
return [] # NOT_APPLICABLE
|
|
425
|
+
|
|
426
|
+
lang = detect_language(file_path)
|
|
427
|
+
findings: list[GateFinding] = []
|
|
428
|
+
|
|
429
|
+
# --- AST-based detection for Python (SQL injection + shell=True) --------
|
|
430
|
+
py_tree: ast.AST | None = None
|
|
431
|
+
if lang == "python":
|
|
432
|
+
try:
|
|
433
|
+
py_tree = ast.parse(content)
|
|
434
|
+
except SyntaxError:
|
|
435
|
+
py_tree = None
|
|
436
|
+
if py_tree is not None:
|
|
437
|
+
for lineno, desc in _detect_python_sql_injection(py_tree):
|
|
438
|
+
findings.append(build_finding(
|
|
439
|
+
check_id="security_scan",
|
|
440
|
+
category=GateCategory.CONTRACT,
|
|
441
|
+
title=f"[security_patterns] {file_path}:{lineno}",
|
|
442
|
+
severity=GateSeverity.HIGH,
|
|
443
|
+
impact=GateImpact.REVISE,
|
|
444
|
+
summary=desc,
|
|
445
|
+
recommendation=f"Use parameterized queries at {file_path}:{lineno}: {desc}",
|
|
446
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=desc, ok=False),),
|
|
447
|
+
repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
|
|
448
|
+
executor_action=f"Fix SQL injection at {file_path}:{lineno}",
|
|
449
|
+
))
|
|
450
|
+
for lineno, desc in _detect_python_shell_true(py_tree):
|
|
451
|
+
findings.append(build_finding(
|
|
452
|
+
check_id="security_scan",
|
|
453
|
+
category=GateCategory.CONTRACT,
|
|
454
|
+
title=f"[security_patterns] {file_path}:{lineno}",
|
|
455
|
+
severity=GateSeverity.HIGH,
|
|
456
|
+
impact=GateImpact.REVISE,
|
|
457
|
+
summary=desc,
|
|
458
|
+
recommendation=f"Avoid shell=True at {file_path}:{lineno}: pass argv list instead.",
|
|
459
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=desc, ok=False),),
|
|
460
|
+
repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
|
|
461
|
+
executor_action=f"Replace shell=True with argv list at {file_path}:{lineno}",
|
|
462
|
+
))
|
|
463
|
+
for lineno, desc in _detect_python_dangerous_calls(py_tree):
|
|
464
|
+
findings.append(build_finding(
|
|
465
|
+
check_id="security_scan",
|
|
466
|
+
category=GateCategory.CONTRACT,
|
|
467
|
+
title=f"[security_patterns] {file_path}:{lineno}",
|
|
468
|
+
severity=GateSeverity.HIGH,
|
|
469
|
+
impact=GateImpact.REVISE,
|
|
470
|
+
summary=desc,
|
|
471
|
+
recommendation=f"Remove dangerous pattern at {file_path}:{lineno}: {desc}",
|
|
472
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=desc, ok=False),),
|
|
473
|
+
repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
|
|
474
|
+
executor_action=f"Fix security issue at {file_path}:{lineno}",
|
|
475
|
+
))
|
|
476
|
+
|
|
477
|
+
# --- Dangerous-pattern detection (regex, language-aware) -----------------
|
|
478
|
+
# For Python we covered shell=True + eval/exec/... via AST; for non-Python
|
|
479
|
+
# keep the regex fallback.
|
|
480
|
+
if lang == "python":
|
|
481
|
+
dangerous_patterns: list[tuple[str, str]] = [] # all handled via AST above
|
|
482
|
+
elif lang in ("javascript", "typescript"):
|
|
483
|
+
dangerous_patterns = list(_SECURITY_JS_PATTERNS)
|
|
484
|
+
else:
|
|
485
|
+
dangerous_patterns = [
|
|
486
|
+
p for p in _SECURITY_DANGEROUS_PATTERNS
|
|
487
|
+
if "eval" in p[0] or "shell" in p[0]
|
|
488
|
+
]
|
|
489
|
+
|
|
490
|
+
for pattern, description in dangerous_patterns:
|
|
491
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
492
|
+
stripped = line.lstrip()
|
|
493
|
+
if stripped.startswith("#") or stripped.startswith("//") or stripped.startswith("*"):
|
|
494
|
+
continue
|
|
495
|
+
if re.search(pattern, line, re.IGNORECASE):
|
|
496
|
+
findings.append(build_finding(
|
|
497
|
+
check_id="security_scan",
|
|
498
|
+
category=GateCategory.CONTRACT,
|
|
499
|
+
title=f"[security_patterns] {file_path}:{i}",
|
|
500
|
+
severity=GateSeverity.HIGH,
|
|
501
|
+
impact=GateImpact.REVISE,
|
|
502
|
+
summary=description,
|
|
503
|
+
recommendation=f"Remove dangerous pattern at {file_path}:{i}: {description}",
|
|
504
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=description, ok=False),),
|
|
505
|
+
repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
|
|
506
|
+
executor_action=f"Fix security issue at {file_path}:{i}",
|
|
507
|
+
))
|
|
508
|
+
if len(findings) >= 20:
|
|
509
|
+
break
|
|
510
|
+
return findings[:20]
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
# ---------------------------------------------------------------------------
|
|
514
|
+
# Cluster 13: Test Quality
|
|
515
|
+
# ---------------------------------------------------------------------------
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def assess_test_quality(
|
|
519
|
+
file_path: str,
|
|
520
|
+
content: str,
|
|
521
|
+
) -> list[GateFinding]:
|
|
522
|
+
"""Cluster 13: Detect weak or empty tests in any test file."""
|
|
523
|
+
import re
|
|
524
|
+
|
|
525
|
+
lang = detect_language(file_path)
|
|
526
|
+
if lang != "python":
|
|
527
|
+
return [] # NOT_APPLICABLE
|
|
528
|
+
|
|
529
|
+
if not file_path.replace("\\", "/").split("/")[-1].startswith("test_"):
|
|
530
|
+
return [] # NOT_APPLICABLE
|
|
531
|
+
|
|
532
|
+
test_funcs = list(re.finditer(r"(?:^|\n)([ \t]*)def (test_\w+)\s*\(", content))
|
|
533
|
+
if not test_funcs:
|
|
534
|
+
return [] # NOT_APPLICABLE
|
|
535
|
+
|
|
536
|
+
# F14a: skip test-function matches whose `def test_...` is inside a
|
|
537
|
+
# string literal (test fixtures that embed Python source as a string).
|
|
538
|
+
string_literal_lines = collect_string_constant_line_ranges(content)
|
|
539
|
+
|
|
540
|
+
findings: list[GateFinding] = []
|
|
541
|
+
for match in test_funcs:
|
|
542
|
+
# Line number of the `def test_...` (1-based). The leading `(?:^|\n)`
|
|
543
|
+
# group may put match.start() on the preceding newline; using
|
|
544
|
+
# match.end()-len(matched_def) is fragile, so count newlines up to
|
|
545
|
+
# the actual "def " position via a substring search within the match.
|
|
546
|
+
match_text = match.group(0)
|
|
547
|
+
def_offset_in_match = match_text.find("def ")
|
|
548
|
+
def_abs_offset = match.start() + (def_offset_in_match if def_offset_in_match >= 0 else 0)
|
|
549
|
+
def_line_no = content[:def_abs_offset].count("\n") + 1
|
|
550
|
+
if def_line_no in string_literal_lines:
|
|
551
|
+
continue
|
|
552
|
+
|
|
553
|
+
indent = match.group(1)
|
|
554
|
+
func_name = match.group(2)
|
|
555
|
+
start = match.end()
|
|
556
|
+
body_pattern = rf"\n{indent}def \w+\s*\(" if indent else r"\ndef \w+\s*\("
|
|
557
|
+
# F14a: walk next-function candidates until we find one that is
|
|
558
|
+
# NOT inside a string literal (else the body would be cut short at
|
|
559
|
+
# a fake `def foo` inside a triple-quoted fixture, dropping real
|
|
560
|
+
# asserts from the body text).
|
|
561
|
+
body_end_rel: int | None = None
|
|
562
|
+
search_pos = 0
|
|
563
|
+
rest = content[start:]
|
|
564
|
+
while True:
|
|
565
|
+
nxt = re.search(body_pattern, rest[search_pos:])
|
|
566
|
+
if nxt is None:
|
|
567
|
+
break
|
|
568
|
+
abs_match_start = start + search_pos + nxt.start()
|
|
569
|
+
def_offset = rest[search_pos + nxt.start():search_pos + nxt.end()].find("def ")
|
|
570
|
+
def_abs = abs_match_start + (def_offset if def_offset >= 0 else 0)
|
|
571
|
+
cand_line = content[:def_abs].count("\n") + 1
|
|
572
|
+
if cand_line not in string_literal_lines:
|
|
573
|
+
body_end_rel = search_pos + nxt.start()
|
|
574
|
+
break
|
|
575
|
+
# Candidate is inside a string literal — skip past it and continue.
|
|
576
|
+
search_pos += nxt.end()
|
|
577
|
+
body = rest[:body_end_rel] if body_end_rel is not None else rest
|
|
578
|
+
|
|
579
|
+
body_stripped = body.strip()
|
|
580
|
+
detail = None
|
|
581
|
+
if not body_stripped or body_stripped == "pass" or body_stripped.startswith("..."):
|
|
582
|
+
detail = "Empty test body (pass/ellipsis only)"
|
|
583
|
+
elif re.search(r"\bassert\s+True\b", body) and not re.search(r"\bassert\s+\w+\s*(==|!=|>|<|in|not)", body):
|
|
584
|
+
detail = "Only assert True -- meaningless assertion"
|
|
585
|
+
elif not bool(re.search(r"\bassert\b|\bAssert\w+\(|\.assert_\w+\(|pytest\.raises", body)):
|
|
586
|
+
detail = "No assertions found in test body"
|
|
587
|
+
|
|
588
|
+
if detail:
|
|
589
|
+
findings.append(build_finding(
|
|
590
|
+
check_id="test_quality_scan",
|
|
591
|
+
category=GateCategory.TESTING,
|
|
592
|
+
title=f"[test_quality] {file_path}:{func_name}",
|
|
593
|
+
severity=GateSeverity.MEDIUM,
|
|
594
|
+
impact=GateImpact.REVISE,
|
|
595
|
+
summary=detail,
|
|
596
|
+
recommendation=f"Add meaningful assertions to {func_name}.",
|
|
597
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
598
|
+
repair_kind=RepairKind.ADD_TEST.value,
|
|
599
|
+
executor_action=f"Add assertions to {func_name} in {file_path}",
|
|
600
|
+
))
|
|
601
|
+
if len(findings) >= 20:
|
|
602
|
+
break
|
|
603
|
+
return findings
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
# ---------------------------------------------------------------------------
|
|
607
|
+
# Cluster 14: Import Cycles
|
|
608
|
+
# ---------------------------------------------------------------------------
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
def assess_import_cycles(
|
|
612
|
+
module_imports: dict[str, list[str]],
|
|
613
|
+
) -> list[GateFinding]:
|
|
614
|
+
"""Cluster 14: Detect circular import dependencies.
|
|
615
|
+
|
|
616
|
+
module_imports: {"module_a": ["module_b", "module_c"], "module_b": ["module_a"], ...}
|
|
617
|
+
"""
|
|
618
|
+
if not module_imports:
|
|
619
|
+
return [] # NOT_APPLICABLE
|
|
620
|
+
|
|
621
|
+
# DFS cycle detection
|
|
622
|
+
cycles: list[tuple[str, ...]] = []
|
|
623
|
+
visited: set[str] = set()
|
|
624
|
+
path: list[str] = []
|
|
625
|
+
path_set: set[str] = set()
|
|
626
|
+
|
|
627
|
+
def _dfs(node: str) -> None:
|
|
628
|
+
if node in path_set:
|
|
629
|
+
cycle_start = path.index(node)
|
|
630
|
+
cycle = tuple(path[cycle_start:] + [node])
|
|
631
|
+
min_idx = cycle.index(min(cycle[:-1]))
|
|
632
|
+
normalized = cycle[min_idx:-1] + cycle[min_idx:]
|
|
633
|
+
if normalized not in [tuple(c) for c in cycles]:
|
|
634
|
+
cycles.append(normalized)
|
|
635
|
+
return
|
|
636
|
+
if node in visited:
|
|
637
|
+
return
|
|
638
|
+
visited.add(node)
|
|
639
|
+
path.append(node)
|
|
640
|
+
path_set.add(node)
|
|
641
|
+
for dep in module_imports.get(node, []):
|
|
642
|
+
if dep in module_imports:
|
|
643
|
+
_dfs(dep)
|
|
644
|
+
path.pop()
|
|
645
|
+
path_set.discard(node)
|
|
646
|
+
|
|
647
|
+
for mod in module_imports:
|
|
648
|
+
if mod not in visited:
|
|
649
|
+
_dfs(mod)
|
|
650
|
+
|
|
651
|
+
findings: list[GateFinding] = []
|
|
652
|
+
for cycle in cycles[:20]:
|
|
653
|
+
chain = " -> ".join(cycle)
|
|
654
|
+
findings.append(build_finding(
|
|
655
|
+
check_id="import_cycle_scan",
|
|
656
|
+
category=GateCategory.CONTRACT,
|
|
657
|
+
title=f"[import_cycles] {chain[:80]}",
|
|
658
|
+
severity=GateSeverity.HIGH,
|
|
659
|
+
impact=GateImpact.REVISE,
|
|
660
|
+
summary=f"Circular dependency: {chain}",
|
|
661
|
+
recommendation="Break the import cycle by extracting shared code to a third module.",
|
|
662
|
+
evidence=(EvidenceReference(kind="probe", detail=f"Circular dependency: {chain}", ok=False),),
|
|
663
|
+
repair_kind=RepairKind.EXTRACT_SHARED.value,
|
|
664
|
+
executor_action=f"Break import cycle: {chain}",
|
|
665
|
+
))
|
|
666
|
+
return findings
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
# ---------------------------------------------------------------------------
|
|
670
|
+
# Cluster 15: Roundtrip Consistency
|
|
671
|
+
# ---------------------------------------------------------------------------
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def assess_roundtrip_consistency(
|
|
675
|
+
class_name: str,
|
|
676
|
+
original_dict: dict[str, object],
|
|
677
|
+
roundtripped_dict: dict[str, object],
|
|
678
|
+
) -> list[GateFinding]:
|
|
679
|
+
"""Cluster 15: Verify serialize/deserialize roundtrip preserves all fields."""
|
|
680
|
+
if not original_dict:
|
|
681
|
+
return [] # NOT_APPLICABLE
|
|
682
|
+
|
|
683
|
+
findings: list[GateFinding] = []
|
|
684
|
+
all_keys = set(original_dict.keys()) | set(roundtripped_dict.keys())
|
|
685
|
+
|
|
686
|
+
for key in sorted(all_keys):
|
|
687
|
+
orig = original_dict.get(key)
|
|
688
|
+
rt = roundtripped_dict.get(key)
|
|
689
|
+
if key not in original_dict:
|
|
690
|
+
detail = f"Key appeared after roundtrip (not in original)"
|
|
691
|
+
elif key not in roundtripped_dict:
|
|
692
|
+
detail = f"Key lost during roundtrip"
|
|
693
|
+
elif orig != rt:
|
|
694
|
+
detail = f"Value changed: {str(orig)[:50]} -> {str(rt)[:50]}"
|
|
695
|
+
else:
|
|
696
|
+
continue
|
|
697
|
+
findings.append(build_finding(
|
|
698
|
+
check_id="roundtrip_check",
|
|
699
|
+
category=GateCategory.DRIFT,
|
|
700
|
+
title=f"[roundtrip_consistency] {class_name}.{key}",
|
|
701
|
+
severity=GateSeverity.MEDIUM,
|
|
702
|
+
impact=GateImpact.REVISE,
|
|
703
|
+
summary=detail,
|
|
704
|
+
recommendation=f"Fix roundtrip serialization for {class_name}.{key}.",
|
|
705
|
+
evidence=(EvidenceReference(kind="probe", detail=detail, ok=False),),
|
|
706
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
707
|
+
executor_action=f"Fix roundtrip for {class_name}.{key}",
|
|
708
|
+
))
|
|
709
|
+
return findings
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
# ---------------------------------------------------------------------------
|
|
713
|
+
# Cluster 16: Shared Mutable State
|
|
714
|
+
# ---------------------------------------------------------------------------
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def assess_shared_mutable_state(
|
|
718
|
+
file_path: str,
|
|
719
|
+
content: str,
|
|
720
|
+
) -> list[GateFinding]:
|
|
721
|
+
"""Cluster 16: Detect module-level mutable state without synchronization."""
|
|
722
|
+
import re
|
|
723
|
+
|
|
724
|
+
if not content.strip():
|
|
725
|
+
return [] # NOT_APPLICABLE
|
|
726
|
+
|
|
727
|
+
uses_threading = bool(re.search(r"\bimport\s+threading\b|\bfrom\s+threading\b|\bThread\s*\(", content))
|
|
728
|
+
|
|
729
|
+
mutable_pattern = re.compile(
|
|
730
|
+
r"^([A-Z_][A-Z_0-9]*)\s*(?::.*?)?\s*=\s*("
|
|
731
|
+
r"\{[^}]*\}|"
|
|
732
|
+
r"\[[^\]]*\]|"
|
|
733
|
+
r"set\s*\(|"
|
|
734
|
+
r"dict\s*\(|"
|
|
735
|
+
r"list\s*\(|"
|
|
736
|
+
r"defaultdict\s*\("
|
|
737
|
+
r")",
|
|
738
|
+
re.MULTILINE,
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
mutables = list(mutable_pattern.finditer(content))
|
|
742
|
+
if not mutables:
|
|
743
|
+
return [] # PASS — no module-level mutables
|
|
744
|
+
|
|
745
|
+
findings: list[GateFinding] = []
|
|
746
|
+
for match in mutables:
|
|
747
|
+
var_name = match.group(1)
|
|
748
|
+
line_num = content[:match.start()].count("\n") + 1
|
|
749
|
+
|
|
750
|
+
mutation_patterns = [
|
|
751
|
+
rf"\b{re.escape(var_name)}\s*\[",
|
|
752
|
+
rf"\b{re.escape(var_name)}\.append\b",
|
|
753
|
+
rf"\b{re.escape(var_name)}\.add\b",
|
|
754
|
+
rf"\b{re.escape(var_name)}\.update\b",
|
|
755
|
+
rf"\b{re.escape(var_name)}\.pop\b",
|
|
756
|
+
rf"\b{re.escape(var_name)}\.remove\b",
|
|
757
|
+
rf"\b{re.escape(var_name)}\.extend\b",
|
|
758
|
+
rf"\b{re.escape(var_name)}\.clear\b",
|
|
759
|
+
rf"\b{re.escape(var_name)}\.discard\b",
|
|
760
|
+
]
|
|
761
|
+
is_mutated = any(re.search(p, content) for p in mutation_patterns)
|
|
762
|
+
|
|
763
|
+
if is_mutated and uses_threading:
|
|
764
|
+
findings.append(build_finding(
|
|
765
|
+
check_id="mutable_state_scan",
|
|
766
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
767
|
+
title=f"[shared_mutable_state] {file_path}:{line_num}:{var_name}",
|
|
768
|
+
severity=GateSeverity.HIGH,
|
|
769
|
+
impact=GateImpact.REVISE,
|
|
770
|
+
summary=f"Module-level mutable '{var_name}' mutated in threaded context without lock",
|
|
771
|
+
recommendation="Add a threading.Lock() to protect all mutations of this variable.",
|
|
772
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"Module-level mutable '{var_name}' mutated in threaded context without lock", ok=False),),
|
|
773
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
774
|
+
executor_action=f"Add lock for '{var_name}' in {file_path}",
|
|
775
|
+
))
|
|
776
|
+
return findings
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
# ---------------------------------------------------------------------------
|
|
780
|
+
# Cluster 17: Dependency Vulnerabilities
|
|
781
|
+
# ---------------------------------------------------------------------------
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
def assess_dependency_vulnerabilities(
|
|
785
|
+
audit_output: str,
|
|
786
|
+
package_manager: str = "pip",
|
|
787
|
+
) -> list[GateFinding]:
|
|
788
|
+
"""Cluster 17: Detect known CVEs in project dependencies."""
|
|
789
|
+
import json as _json
|
|
790
|
+
|
|
791
|
+
if not audit_output.strip():
|
|
792
|
+
return [] # NOT_APPLICABLE
|
|
793
|
+
|
|
794
|
+
try:
|
|
795
|
+
data = _json.loads(audit_output)
|
|
796
|
+
except _json.JSONDecodeError:
|
|
797
|
+
return [_insufficient_evidence_finding(
|
|
798
|
+
check_id="dependency_cve_scan",
|
|
799
|
+
category=GateCategory.CONTRACT,
|
|
800
|
+
cluster="dependency_vulnerabilities",
|
|
801
|
+
explanation=f"Could not parse {package_manager} audit output as JSON",
|
|
802
|
+
)]
|
|
803
|
+
|
|
804
|
+
findings: list[GateFinding] = []
|
|
805
|
+
|
|
806
|
+
if package_manager == "pip":
|
|
807
|
+
vulns = data if isinstance(data, list) else data.get("vulnerabilities", [])
|
|
808
|
+
for vuln in vulns:
|
|
809
|
+
name = vuln.get("name") or vuln.get("package_name") or "unknown"
|
|
810
|
+
vuln_id = vuln.get("id") or vuln.get("vulnerability_id") or (vuln.get("aliases") or [""])[0] or "CVE-unknown"
|
|
811
|
+
version = vuln.get("version") or vuln.get("installed_version") or "?"
|
|
812
|
+
fix = vuln.get("fix_versions") or vuln.get("fixed_in") or []
|
|
813
|
+
findings.append(build_finding(
|
|
814
|
+
check_id="dependency_cve_scan",
|
|
815
|
+
category=GateCategory.CONTRACT,
|
|
816
|
+
title=f"[dependency_vulnerabilities] {name}=={version}",
|
|
817
|
+
severity=GateSeverity.HIGH,
|
|
818
|
+
impact=GateImpact.REVISE,
|
|
819
|
+
summary=f"{vuln_id}: {name}=={version} (fix: {fix})",
|
|
820
|
+
recommendation=f"Upgrade {name} to a fixed version: {fix}",
|
|
821
|
+
evidence=(EvidenceReference(kind="probe", detail=f"{vuln_id}: {name}=={version} (fix: {fix})", ok=False),),
|
|
822
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
823
|
+
executor_action=f"Upgrade {name} to fix {vuln_id}",
|
|
824
|
+
))
|
|
825
|
+
elif package_manager == "npm":
|
|
826
|
+
vulns = data.get("vulnerabilities", {})
|
|
827
|
+
for pkg_name, info in vulns.items():
|
|
828
|
+
severity = info.get("severity", "unknown")
|
|
829
|
+
findings.append(build_finding(
|
|
830
|
+
check_id="dependency_cve_scan",
|
|
831
|
+
category=GateCategory.CONTRACT,
|
|
832
|
+
title=f"[dependency_vulnerabilities] {pkg_name}",
|
|
833
|
+
severity=GateSeverity.HIGH,
|
|
834
|
+
impact=GateImpact.REVISE,
|
|
835
|
+
summary=f"{pkg_name}: severity={severity}",
|
|
836
|
+
recommendation=f"Upgrade or patch {pkg_name}.",
|
|
837
|
+
evidence=(EvidenceReference(kind="probe", detail=f"{pkg_name}: severity={severity}", ok=False),),
|
|
838
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
839
|
+
executor_action=f"Upgrade {pkg_name}",
|
|
840
|
+
))
|
|
841
|
+
|
|
842
|
+
return findings
|