vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,763 @@
|
|
|
1
|
+
"""Data handling, time, duplication, and dependency quality. Clusters 44-50.
|
|
2
|
+
|
|
3
|
+
Clusters:
|
|
4
|
+
44 - Naive Timezone Usage
|
|
5
|
+
45 - Intra-File Near-Duplicate Code
|
|
6
|
+
46 - Missing Null/None Check at API Boundary
|
|
7
|
+
47 - String Concatenation for Paths
|
|
8
|
+
48 - Log Without Error Context
|
|
9
|
+
49 - Secrets in Test Files
|
|
10
|
+
50 - Unpinned Dependencies
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from .core import detect_language
|
|
15
|
+
from ...gate_models import (
|
|
16
|
+
EvidenceReference,
|
|
17
|
+
GateCategory,
|
|
18
|
+
GateFinding,
|
|
19
|
+
GateImpact,
|
|
20
|
+
GateSeverity,
|
|
21
|
+
RepairKind,
|
|
22
|
+
)
|
|
23
|
+
from ..common import build_finding
|
|
24
|
+
import logging
|
|
25
|
+
_log = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# Cluster 44: Naive Timezone Usage
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def assess_naive_timezone(
|
|
34
|
+
file_path: str,
|
|
35
|
+
content: str,
|
|
36
|
+
) -> list[GateFinding]:
|
|
37
|
+
"""Cluster 44: Detect naive datetime usage without timezone awareness."""
|
|
38
|
+
import re
|
|
39
|
+
|
|
40
|
+
if not content.strip():
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
lang = detect_language(file_path)
|
|
44
|
+
if lang not in ("python", "javascript", "typescript"):
|
|
45
|
+
return []
|
|
46
|
+
|
|
47
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
48
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
findings: list[GateFinding] = []
|
|
52
|
+
|
|
53
|
+
if lang == "python":
|
|
54
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
55
|
+
stripped = line.strip()
|
|
56
|
+
if stripped.startswith("#"):
|
|
57
|
+
continue
|
|
58
|
+
if re.search(r'datetime\.now\s*\(\s*\)', stripped):
|
|
59
|
+
detail = f"datetime.now() without timezone (line {i}) -- use datetime.now(tz=timezone.utc)"
|
|
60
|
+
findings.append(build_finding(
|
|
61
|
+
check_id="timezone_scan",
|
|
62
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
63
|
+
title=f"[naive_timezone] {file_path}:{i}",
|
|
64
|
+
severity=GateSeverity.MEDIUM,
|
|
65
|
+
impact=GateImpact.REVISE,
|
|
66
|
+
summary=detail,
|
|
67
|
+
recommendation="Use `datetime.now(tz=timezone.utc)` for timezone-aware datetimes.",
|
|
68
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
69
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
70
|
+
executor_action=f"Fix naive datetime at {file_path}:{i}",
|
|
71
|
+
))
|
|
72
|
+
if re.search(r'datetime\.utcnow\s*\(', stripped):
|
|
73
|
+
detail = f"datetime.utcnow() is deprecated (line {i}) -- use datetime.now(tz=timezone.utc)"
|
|
74
|
+
findings.append(build_finding(
|
|
75
|
+
check_id="timezone_scan",
|
|
76
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
77
|
+
title=f"[naive_timezone] {file_path}:{i}",
|
|
78
|
+
severity=GateSeverity.MEDIUM,
|
|
79
|
+
impact=GateImpact.REVISE,
|
|
80
|
+
summary=detail,
|
|
81
|
+
recommendation="Replace `datetime.utcnow()` with `datetime.now(tz=timezone.utc)`.",
|
|
82
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
83
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
84
|
+
executor_action=f"Fix deprecated utcnow() at {file_path}:{i}",
|
|
85
|
+
))
|
|
86
|
+
if re.search(r'time\.localtime\s*\(\s*\)', stripped):
|
|
87
|
+
detail = f"time.localtime() without timezone (line {i}) -- use time.gmtime() or datetime"
|
|
88
|
+
findings.append(build_finding(
|
|
89
|
+
check_id="timezone_scan",
|
|
90
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
91
|
+
title=f"[naive_timezone] {file_path}:{i}",
|
|
92
|
+
severity=GateSeverity.LOW,
|
|
93
|
+
impact=GateImpact.WARN,
|
|
94
|
+
summary=detail,
|
|
95
|
+
recommendation="Use `time.gmtime()` or `datetime.now(tz=timezone.utc)` instead.",
|
|
96
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
97
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
98
|
+
executor_action=f"Fix localtime() at {file_path}:{i}",
|
|
99
|
+
))
|
|
100
|
+
if len(findings) >= 10:
|
|
101
|
+
break
|
|
102
|
+
|
|
103
|
+
elif lang in ("javascript", "typescript"):
|
|
104
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
105
|
+
stripped = line.strip()
|
|
106
|
+
if stripped.startswith("//"):
|
|
107
|
+
continue
|
|
108
|
+
if re.search(r'\.toLocaleDateString\s*\(\s*\)', stripped):
|
|
109
|
+
detail = f"toLocaleDateString() without locale (line {i}) -- specify locale explicitly"
|
|
110
|
+
findings.append(build_finding(
|
|
111
|
+
check_id="timezone_scan",
|
|
112
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
113
|
+
title=f"[naive_timezone] {file_path}:{i}",
|
|
114
|
+
severity=GateSeverity.LOW,
|
|
115
|
+
impact=GateImpact.WARN,
|
|
116
|
+
summary=detail,
|
|
117
|
+
recommendation="Pass explicit locale: `.toLocaleDateString('en-US', { timeZone: 'UTC' })`.",
|
|
118
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
119
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
120
|
+
executor_action=f"Fix toLocaleDateString() at {file_path}:{i}",
|
|
121
|
+
))
|
|
122
|
+
if len(findings) >= 10:
|
|
123
|
+
break
|
|
124
|
+
|
|
125
|
+
return findings
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ---------------------------------------------------------------------------
|
|
129
|
+
# Cluster 45: Intra-File Near-Duplicate Code
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
import re as _re
|
|
134
|
+
|
|
135
|
+
# FP-round2-D (2026-06-28): signature / typing scaffolding line shapes that must
|
|
136
|
+
# NOT count as meaningful duplicate lines. These repeat by language requirement
|
|
137
|
+
# (typing overloads) or API symmetry and are not refactorable logic:
|
|
138
|
+
# * decorator lines: ``@t.overload`` / ``@property`` / ``@staticmethod``
|
|
139
|
+
# * def openers / closers: ``def f(`` / ``async def f(`` / ``): ...`` / ``) -> X:``
|
|
140
|
+
# * bare parameter declarations inside a multi-line signature:
|
|
141
|
+
# ``name`` | ``name,`` | ``name=default,`` | ``name: type,`` | ``*args,``
|
|
142
|
+
# where the value is a simple literal/identifier (NOT a function call, so a
|
|
143
|
+
# real statement like ``record = build_record(...)`` is never skipped).
|
|
144
|
+
# * lone ellipsis stub bodies: ``...``
|
|
145
|
+
_SCAFFOLD_PARAM_RE = _re.compile(
|
|
146
|
+
r"^\*{0,2}[A-Za-z_]\w*" # name, *args, **kwargs
|
|
147
|
+
r"(?:\s*:\s*[^=(]+?)?" # optional annotation (no call parens)
|
|
148
|
+
r"(?:\s*=\s*[^(]+?)?" # optional simple default (no call parens)
|
|
149
|
+
r",?$" # optional trailing comma
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _is_signature_scaffolding(s: str) -> bool:
|
|
154
|
+
"""True if normalized line *s* is signature / typing scaffolding."""
|
|
155
|
+
if s == "..." or s.endswith("): ...") or s == "): ..." or s.endswith(") -> ..."):
|
|
156
|
+
return True
|
|
157
|
+
if s.startswith("@"): # decorator
|
|
158
|
+
return True
|
|
159
|
+
if s.startswith("def ") or s.startswith("async def "):
|
|
160
|
+
# ``def f(`` opener (possibly with the full single-line signature). A
|
|
161
|
+
# single-line def with a body on the same line is rare; treat the
|
|
162
|
+
# ``def`` header as scaffolding either way.
|
|
163
|
+
return True
|
|
164
|
+
if s in (")", "):", "->", ") ->"):
|
|
165
|
+
return True
|
|
166
|
+
# Closer with return type only: ``) -> SomeType:`` (no other statement).
|
|
167
|
+
if s.startswith(")") and s.endswith(":"):
|
|
168
|
+
return True
|
|
169
|
+
# Bare parameter declaration line inside a multi-line signature.
|
|
170
|
+
if _SCAFFOLD_PARAM_RE.match(s) and "(" not in s:
|
|
171
|
+
return True
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def assess_near_duplicate_code(
|
|
176
|
+
file_path: str,
|
|
177
|
+
content: str,
|
|
178
|
+
) -> list[GateFinding]:
|
|
179
|
+
"""Cluster 45: Detect near-duplicate code blocks within the same file.
|
|
180
|
+
|
|
181
|
+
A single duplicated REGION of N lines spans N-BLOCK_SIZE+1 overlapping
|
|
182
|
+
sliding windows. Emitting one finding per window inflated the count (a
|
|
183
|
+
4-statement block reported once per line: "lines 118 and 201", "119 and
|
|
184
|
+
202", ...). We collect every duplicate window-pair, then MERGE contiguous /
|
|
185
|
+
overlapping pairs into ONE finding per contiguous block — mirroring the
|
|
186
|
+
region-grouping ``_merge_starts`` used by ``duplication.text_block`` — so a
|
|
187
|
+
block reports as "lines 118-121 <-> 201-204" exactly once. Genuinely
|
|
188
|
+
separate duplicate blocks still each report once (merge, not cap).
|
|
189
|
+
"""
|
|
190
|
+
if not content.strip():
|
|
191
|
+
return []
|
|
192
|
+
|
|
193
|
+
lang = detect_language(file_path)
|
|
194
|
+
if lang in ("json", "yaml", "toml", "markdown", "restructuredtext", "sql"):
|
|
195
|
+
return []
|
|
196
|
+
|
|
197
|
+
lines = content.splitlines()
|
|
198
|
+
if len(lines) < 10:
|
|
199
|
+
return []
|
|
200
|
+
|
|
201
|
+
BLOCK_SIZE = 4
|
|
202
|
+
# FP-round2-D (2026-06-28): minimum number of MEANINGFUL (post-normalization,
|
|
203
|
+
# non-scaffolding) lines a duplicated region must span to be reported.
|
|
204
|
+
#
|
|
205
|
+
# The PRIMARY noise discriminator is ``_is_signature_scaffolding`` below: on
|
|
206
|
+
# real code (click, mcp) most near-duplicate hits were typing/signature
|
|
207
|
+
# mirrors — ``@t.overload`` stubs, parameter-list mirrors — whose lines are
|
|
208
|
+
# now stripped from ``normalized`` entirely, so those regions never form.
|
|
209
|
+
#
|
|
210
|
+
# This line-count floor is a SECONDARY filter against very short residual
|
|
211
|
+
# mirrors (e.g. repeated 3-4 line encoding-literal bodies). It is set to 5
|
|
212
|
+
# so that genuine multi-statement logic duplicates (>=5 meaningful lines)
|
|
213
|
+
# are still reported — including the oracle's 6-line route_alpha/route_beta
|
|
214
|
+
# bodies and 4-statement+return logic blocks — while trivial 3-4 line
|
|
215
|
+
# mirrors are dropped.
|
|
216
|
+
MIN_DUP_REGION_LINES = 5
|
|
217
|
+
normalized: list[tuple[str, int]] = []
|
|
218
|
+
for i, line in enumerate(lines, 1):
|
|
219
|
+
s = line.strip()
|
|
220
|
+
if not s or s.startswith("#") or s.startswith("//") or s.startswith("*"):
|
|
221
|
+
continue
|
|
222
|
+
if s in ("}", "{", "pass", "return", "break", "continue", "else:", "try:", "finally:"):
|
|
223
|
+
continue
|
|
224
|
+
# FP-round2-D: skip signature / typing scaffolding so overload stubs and
|
|
225
|
+
# parameter-list mirrors do not accumulate "meaningful" duplicate lines.
|
|
226
|
+
if _is_signature_scaffolding(s):
|
|
227
|
+
continue
|
|
228
|
+
normalized.append((" ".join(s.split()), i))
|
|
229
|
+
|
|
230
|
+
if len(normalized) < BLOCK_SIZE * 2:
|
|
231
|
+
return []
|
|
232
|
+
|
|
233
|
+
# Pass 1: collect every duplicate window as a (first_occurrence, this) pair
|
|
234
|
+
# of the *normalized* index, keeping the source line number for each.
|
|
235
|
+
seen: dict[str, tuple[int, int]] = {} # fingerprint -> (norm_idx, line_no)
|
|
236
|
+
# raw_pairs: list of (orig_norm_idx, dup_norm_idx, orig_line, dup_line)
|
|
237
|
+
raw_pairs: list[tuple[int, int, int, int]] = []
|
|
238
|
+
for idx in range(len(normalized) - BLOCK_SIZE + 1):
|
|
239
|
+
block = tuple(normalized[idx + k][0] for k in range(BLOCK_SIZE))
|
|
240
|
+
fp = "\n".join(block)
|
|
241
|
+
first_line = normalized[idx][1]
|
|
242
|
+
if fp in seen:
|
|
243
|
+
orig_idx, orig_line = seen[fp]
|
|
244
|
+
if abs(first_line - orig_line) >= BLOCK_SIZE:
|
|
245
|
+
raw_pairs.append((orig_idx, idx, orig_line, first_line))
|
|
246
|
+
else:
|
|
247
|
+
seen[fp] = (idx, first_line)
|
|
248
|
+
|
|
249
|
+
if not raw_pairs:
|
|
250
|
+
return []
|
|
251
|
+
|
|
252
|
+
# Pass 2: merge contiguous/overlapping window-pairs into block-level
|
|
253
|
+
# regions. Two pairs belong to the same duplicated block when BOTH their
|
|
254
|
+
# original-window index and duplicate-window index advance by exactly one
|
|
255
|
+
# step together (the sliding window moved one normalized line on each side).
|
|
256
|
+
# Each merged region records the source line span on both sides.
|
|
257
|
+
raw_pairs.sort()
|
|
258
|
+
regions: list[tuple[int, int, int, int]] = [] # (orig_start_line, orig_end_line, dup_start_line, dup_end_line)
|
|
259
|
+
cur_orig_idx, cur_dup_idx, cur_orig_start, cur_dup_start = raw_pairs[0]
|
|
260
|
+
cur_orig_end_line = cur_orig_start
|
|
261
|
+
cur_dup_end_line = cur_dup_start
|
|
262
|
+
prev_orig_idx, prev_dup_idx = cur_orig_idx, cur_dup_idx
|
|
263
|
+
|
|
264
|
+
def _flush() -> None:
|
|
265
|
+
# End line of a BLOCK_SIZE window starting at the recorded start line:
|
|
266
|
+
# add the height of the window (last normalized line in the window).
|
|
267
|
+
oi = prev_orig_idx
|
|
268
|
+
di = prev_dup_idx
|
|
269
|
+
orig_end = normalized[oi + BLOCK_SIZE - 1][1]
|
|
270
|
+
dup_end = normalized[di + BLOCK_SIZE - 1][1]
|
|
271
|
+
regions.append((cur_orig_start, orig_end, cur_dup_start, dup_end))
|
|
272
|
+
|
|
273
|
+
for orig_idx, dup_idx, orig_line, dup_line in raw_pairs[1:]:
|
|
274
|
+
if orig_idx == prev_orig_idx + 1 and dup_idx == prev_dup_idx + 1:
|
|
275
|
+
# Same sliding region — extend.
|
|
276
|
+
prev_orig_idx, prev_dup_idx = orig_idx, dup_idx
|
|
277
|
+
continue
|
|
278
|
+
# New region — flush the current one and start fresh.
|
|
279
|
+
_flush()
|
|
280
|
+
cur_orig_idx, cur_dup_idx = orig_idx, dup_idx
|
|
281
|
+
cur_orig_start, cur_dup_start = orig_line, dup_line
|
|
282
|
+
prev_orig_idx, prev_dup_idx = orig_idx, dup_idx
|
|
283
|
+
_flush()
|
|
284
|
+
|
|
285
|
+
findings: list[GateFinding] = []
|
|
286
|
+
for orig_start, orig_end, dup_start, dup_end in regions:
|
|
287
|
+
n_lines = orig_end - orig_start + 1
|
|
288
|
+
# FP-round2-D: count MEANINGFUL (normalized, non-scaffolding) lines that
|
|
289
|
+
# actually fall inside the original region — the raw line span can
|
|
290
|
+
# include blank/comment gaps. Require >= MIN_DUP_REGION_LINES to report.
|
|
291
|
+
meaningful_in_region = sum(
|
|
292
|
+
1 for _norm_text, _ln in normalized if orig_start <= _ln <= orig_end
|
|
293
|
+
)
|
|
294
|
+
if meaningful_in_region < MIN_DUP_REGION_LINES:
|
|
295
|
+
continue
|
|
296
|
+
detail = (
|
|
297
|
+
f"Near-duplicate block at lines {orig_start}-{orig_end} <-> "
|
|
298
|
+
f"{dup_start}-{dup_end} ({n_lines} lines)"
|
|
299
|
+
)
|
|
300
|
+
findings.append(build_finding(
|
|
301
|
+
check_id="duplicate_scan",
|
|
302
|
+
category=GateCategory.DRIFT,
|
|
303
|
+
title=f"[near_duplicate_code] {file_path}:{dup_start}",
|
|
304
|
+
severity=GateSeverity.LOW,
|
|
305
|
+
impact=GateImpact.WARN,
|
|
306
|
+
summary=detail,
|
|
307
|
+
recommendation="Extract the duplicate block into a shared function.",
|
|
308
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
309
|
+
repair_kind=RepairKind.REMOVE_DUPLICATE.value,
|
|
310
|
+
executor_action=f"Deduplicate code block at {file_path}:{dup_start}",
|
|
311
|
+
))
|
|
312
|
+
if len(findings) >= 10:
|
|
313
|
+
break
|
|
314
|
+
|
|
315
|
+
return findings
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
# ---------------------------------------------------------------------------
|
|
319
|
+
# Cluster 46: Missing Null/None Check at API Boundary
|
|
320
|
+
# ---------------------------------------------------------------------------
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def assess_missing_null_check(
|
|
324
|
+
file_path: str,
|
|
325
|
+
content: str,
|
|
326
|
+
) -> list[GateFinding]:
|
|
327
|
+
"""Cluster 46: Detect missing null/None checks at API boundaries."""
|
|
328
|
+
import re
|
|
329
|
+
|
|
330
|
+
if not content.strip():
|
|
331
|
+
return []
|
|
332
|
+
|
|
333
|
+
lang = detect_language(file_path)
|
|
334
|
+
if lang not in ("python", "javascript", "typescript"):
|
|
335
|
+
return []
|
|
336
|
+
|
|
337
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
338
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
339
|
+
return []
|
|
340
|
+
|
|
341
|
+
findings: list[GateFinding] = []
|
|
342
|
+
|
|
343
|
+
if lang == "python":
|
|
344
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
345
|
+
stripped = line.strip()
|
|
346
|
+
if stripped.startswith("#"):
|
|
347
|
+
continue
|
|
348
|
+
if re.search(r'request\.json\s*\[', stripped):
|
|
349
|
+
detail = f"request.json[key] without .get() -- KeyError if missing (line {i})"
|
|
350
|
+
findings.append(build_finding(
|
|
351
|
+
check_id="null_check_scan",
|
|
352
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
353
|
+
title=f"[missing_null_check] {file_path}:{i}",
|
|
354
|
+
severity=GateSeverity.MEDIUM,
|
|
355
|
+
impact=GateImpact.REVISE,
|
|
356
|
+
summary=detail,
|
|
357
|
+
recommendation="Use `request.json.get('key')` with a default value.",
|
|
358
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
359
|
+
repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
|
|
360
|
+
executor_action=f"Fix missing null check at {file_path}:{i}",
|
|
361
|
+
))
|
|
362
|
+
if re.search(r'request\.form\s*\[', stripped):
|
|
363
|
+
detail = f"request.form[key] without .get() -- KeyError if missing (line {i})"
|
|
364
|
+
findings.append(build_finding(
|
|
365
|
+
check_id="null_check_scan",
|
|
366
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
367
|
+
title=f"[missing_null_check] {file_path}:{i}",
|
|
368
|
+
severity=GateSeverity.MEDIUM,
|
|
369
|
+
impact=GateImpact.REVISE,
|
|
370
|
+
summary=detail,
|
|
371
|
+
recommendation="Use `request.form.get('key')` with a default value.",
|
|
372
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
373
|
+
repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
|
|
374
|
+
executor_action=f"Fix missing null check at {file_path}:{i}",
|
|
375
|
+
))
|
|
376
|
+
if re.search(r'json\.loads\s*\([^)]+\)\s*\[', stripped):
|
|
377
|
+
detail = f"json.loads()[key] -- chain of failure points (line {i})"
|
|
378
|
+
findings.append(build_finding(
|
|
379
|
+
check_id="null_check_scan",
|
|
380
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
381
|
+
title=f"[missing_null_check] {file_path}:{i}",
|
|
382
|
+
severity=GateSeverity.MEDIUM,
|
|
383
|
+
impact=GateImpact.REVISE,
|
|
384
|
+
summary=detail,
|
|
385
|
+
recommendation="Assign json.loads() to a variable and use .get() for key access.",
|
|
386
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
387
|
+
repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
|
|
388
|
+
executor_action=f"Fix chained failure points at {file_path}:{i}",
|
|
389
|
+
))
|
|
390
|
+
if len(findings) >= 10:
|
|
391
|
+
break
|
|
392
|
+
|
|
393
|
+
elif lang in ("javascript", "typescript"):
|
|
394
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
395
|
+
stripped = line.strip()
|
|
396
|
+
if stripped.startswith("//"):
|
|
397
|
+
continue
|
|
398
|
+
if re.search(r'req\.body\.\w+', stripped) and "?." not in stripped:
|
|
399
|
+
if not re.search(r'if\s*\(.*req\.body', stripped):
|
|
400
|
+
detail = f"req.body.field without null check (line {i}) -- use ?. or validate first"
|
|
401
|
+
findings.append(build_finding(
|
|
402
|
+
check_id="null_check_scan",
|
|
403
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
404
|
+
title=f"[missing_null_check] {file_path}:{i}",
|
|
405
|
+
severity=GateSeverity.MEDIUM,
|
|
406
|
+
impact=GateImpact.REVISE,
|
|
407
|
+
summary=detail,
|
|
408
|
+
recommendation="Use optional chaining (`?.`) or validate `req.body` before accessing fields.",
|
|
409
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
410
|
+
repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
|
|
411
|
+
executor_action=f"Fix missing null check at {file_path}:{i}",
|
|
412
|
+
))
|
|
413
|
+
if re.search(r'JSON\.parse\s*\([^)]+\)\.\w+', stripped):
|
|
414
|
+
detail = f"JSON.parse().field -- chain of failure points (line {i})"
|
|
415
|
+
findings.append(build_finding(
|
|
416
|
+
check_id="null_check_scan",
|
|
417
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
418
|
+
title=f"[missing_null_check] {file_path}:{i}",
|
|
419
|
+
severity=GateSeverity.MEDIUM,
|
|
420
|
+
impact=GateImpact.REVISE,
|
|
421
|
+
summary=detail,
|
|
422
|
+
recommendation="Assign JSON.parse() to a variable and use optional chaining for field access.",
|
|
423
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
424
|
+
repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
|
|
425
|
+
executor_action=f"Fix chained failure points at {file_path}:{i}",
|
|
426
|
+
))
|
|
427
|
+
if len(findings) >= 10:
|
|
428
|
+
break
|
|
429
|
+
|
|
430
|
+
return findings
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# ---------------------------------------------------------------------------
|
|
434
|
+
# Cluster 47: String Concatenation for Paths
|
|
435
|
+
# ---------------------------------------------------------------------------
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def assess_path_concatenation(
|
|
439
|
+
file_path: str,
|
|
440
|
+
content: str,
|
|
441
|
+
) -> list[GateFinding]:
|
|
442
|
+
"""Cluster 47: Detect string concatenation used to build file paths."""
|
|
443
|
+
import re
|
|
444
|
+
|
|
445
|
+
if not content.strip():
|
|
446
|
+
return []
|
|
447
|
+
|
|
448
|
+
lang = detect_language(file_path)
|
|
449
|
+
if lang not in ("python", "javascript", "typescript"):
|
|
450
|
+
return []
|
|
451
|
+
|
|
452
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
453
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
454
|
+
return []
|
|
455
|
+
|
|
456
|
+
findings: list[GateFinding] = []
|
|
457
|
+
|
|
458
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
459
|
+
stripped = line.strip()
|
|
460
|
+
if stripped.startswith("#") or stripped.startswith("//"):
|
|
461
|
+
continue
|
|
462
|
+
if "http://" in stripped or "https://" in stripped:
|
|
463
|
+
continue
|
|
464
|
+
if re.search(r'\w+\s*\+\s*["\'][/\\]["\']', stripped):
|
|
465
|
+
ctx_words = ("path", "dir", "file", "folder", "name", "root", "base")
|
|
466
|
+
if any(w in stripped.lower() for w in ctx_words):
|
|
467
|
+
detail = f"String concat for path building (line {i}) -- use os.path.join / Path"
|
|
468
|
+
findings.append(build_finding(
|
|
469
|
+
check_id="path_concat_scan",
|
|
470
|
+
category=GateCategory.CONTRACT,
|
|
471
|
+
title=f"[path_concatenation] {file_path}:{i}",
|
|
472
|
+
severity=GateSeverity.LOW,
|
|
473
|
+
impact=GateImpact.WARN,
|
|
474
|
+
summary=detail,
|
|
475
|
+
recommendation="Use `os.path.join()` or `pathlib.Path` instead of string concatenation.",
|
|
476
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
477
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
478
|
+
executor_action=f"Fix path concatenation at {file_path}:{i}",
|
|
479
|
+
))
|
|
480
|
+
if lang == "python" and re.search(r'f["\'][^"\']*\{[^}]+\}/\{[^}]+\}', stripped):
|
|
481
|
+
detail = f"f-string path building (line {i}) -- use os.path.join / Path"
|
|
482
|
+
findings.append(build_finding(
|
|
483
|
+
check_id="path_concat_scan",
|
|
484
|
+
category=GateCategory.CONTRACT,
|
|
485
|
+
title=f"[path_concatenation] {file_path}:{i}",
|
|
486
|
+
severity=GateSeverity.LOW,
|
|
487
|
+
impact=GateImpact.WARN,
|
|
488
|
+
summary=detail,
|
|
489
|
+
recommendation="Use `os.path.join()` or `pathlib.Path` instead of f-string path building.",
|
|
490
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
491
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
492
|
+
executor_action=f"Fix f-string path building at {file_path}:{i}",
|
|
493
|
+
))
|
|
494
|
+
if len(findings) >= 10:
|
|
495
|
+
break
|
|
496
|
+
|
|
497
|
+
return findings
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
# ---------------------------------------------------------------------------
|
|
501
|
+
# Cluster 48: Log Without Error Context
|
|
502
|
+
# ---------------------------------------------------------------------------
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def assess_log_without_context(
|
|
506
|
+
file_path: str,
|
|
507
|
+
content: str,
|
|
508
|
+
) -> list[GateFinding]:
|
|
509
|
+
"""Cluster 48: Detect error logging without exception context."""
|
|
510
|
+
import re
|
|
511
|
+
|
|
512
|
+
if not content.strip():
|
|
513
|
+
return []
|
|
514
|
+
|
|
515
|
+
lang = detect_language(file_path)
|
|
516
|
+
if lang not in ("python", "javascript", "typescript", "java"):
|
|
517
|
+
return []
|
|
518
|
+
|
|
519
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
520
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
521
|
+
return []
|
|
522
|
+
|
|
523
|
+
findings: list[GateFinding] = []
|
|
524
|
+
lines = content.splitlines()
|
|
525
|
+
|
|
526
|
+
if lang == "python":
|
|
527
|
+
in_except = False
|
|
528
|
+
except_var = None
|
|
529
|
+
except_indent = 0
|
|
530
|
+
|
|
531
|
+
for i, line in enumerate(lines, 1):
|
|
532
|
+
stripped = line.strip()
|
|
533
|
+
indent = len(line) - len(line.lstrip())
|
|
534
|
+
|
|
535
|
+
m = re.match(r'^except\s+\w+(?:\s+as\s+(\w+))?\s*:', stripped)
|
|
536
|
+
if m:
|
|
537
|
+
in_except = True
|
|
538
|
+
except_var = m.group(1)
|
|
539
|
+
except_indent = indent
|
|
540
|
+
continue
|
|
541
|
+
|
|
542
|
+
if in_except:
|
|
543
|
+
if indent <= except_indent and stripped:
|
|
544
|
+
in_except = False
|
|
545
|
+
except_var = None
|
|
546
|
+
continue
|
|
547
|
+
if re.search(r'(?:logger?|logging)\.\w*(error|exception|critical)\s*\(', stripped):
|
|
548
|
+
has_context = False
|
|
549
|
+
if except_var and re.search(rf'\b{re.escape(except_var)}\b', stripped):
|
|
550
|
+
has_context = True
|
|
551
|
+
if "exc_info" in stripped:
|
|
552
|
+
has_context = True
|
|
553
|
+
if "traceback" in stripped:
|
|
554
|
+
has_context = True
|
|
555
|
+
if ".exception(" in stripped:
|
|
556
|
+
has_context = True
|
|
557
|
+
if not has_context:
|
|
558
|
+
detail = f"logger.error() in except block without exception context (line {i})"
|
|
559
|
+
findings.append(build_finding(
|
|
560
|
+
check_id="log_context_scan",
|
|
561
|
+
category=GateCategory.REPORTING,
|
|
562
|
+
title=f"[log_without_context] {file_path}:{i}",
|
|
563
|
+
severity=GateSeverity.LOW,
|
|
564
|
+
impact=GateImpact.WARN,
|
|
565
|
+
summary=detail,
|
|
566
|
+
recommendation="Use `logger.exception()` or pass `exc_info=True` to include traceback.",
|
|
567
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
568
|
+
repair_kind=RepairKind.ADD_PROOF.value,
|
|
569
|
+
executor_action=f"Add exception context to log at {file_path}:{i}",
|
|
570
|
+
))
|
|
571
|
+
if len(findings) >= 10:
|
|
572
|
+
break
|
|
573
|
+
|
|
574
|
+
elif lang in ("javascript", "typescript"):
|
|
575
|
+
in_catch = False
|
|
576
|
+
catch_var = None
|
|
577
|
+
catch_indent = 0
|
|
578
|
+
|
|
579
|
+
for i, line in enumerate(lines, 1):
|
|
580
|
+
stripped = line.strip()
|
|
581
|
+
indent = len(line) - len(line.lstrip())
|
|
582
|
+
|
|
583
|
+
m = re.match(r'catch\s*\(\s*(\w+)\s*\)', stripped)
|
|
584
|
+
if m:
|
|
585
|
+
in_catch = True
|
|
586
|
+
catch_var = m.group(1)
|
|
587
|
+
catch_indent = indent
|
|
588
|
+
continue
|
|
589
|
+
|
|
590
|
+
if in_catch:
|
|
591
|
+
if indent <= catch_indent and stripped and stripped != "}":
|
|
592
|
+
in_catch = False
|
|
593
|
+
catch_var = None
|
|
594
|
+
continue
|
|
595
|
+
if re.search(r'console\.error\s*\(', stripped):
|
|
596
|
+
if catch_var and catch_var not in stripped:
|
|
597
|
+
detail = f"console.error() in catch block without error object (line {i})"
|
|
598
|
+
findings.append(build_finding(
|
|
599
|
+
check_id="log_context_scan",
|
|
600
|
+
category=GateCategory.REPORTING,
|
|
601
|
+
title=f"[log_without_context] {file_path}:{i}",
|
|
602
|
+
severity=GateSeverity.LOW,
|
|
603
|
+
impact=GateImpact.WARN,
|
|
604
|
+
summary=detail,
|
|
605
|
+
recommendation=f"Pass the error object to console.error: `console.error('message', {catch_var})`.",
|
|
606
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
607
|
+
repair_kind=RepairKind.ADD_PROOF.value,
|
|
608
|
+
executor_action=f"Add error context to log at {file_path}:{i}",
|
|
609
|
+
))
|
|
610
|
+
if len(findings) >= 10:
|
|
611
|
+
break
|
|
612
|
+
|
|
613
|
+
return findings
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
# ---------------------------------------------------------------------------
|
|
617
|
+
# Cluster 49: Secrets in Test Files
|
|
618
|
+
# ---------------------------------------------------------------------------
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def assess_test_secrets(
|
|
622
|
+
file_path: str,
|
|
623
|
+
content: str,
|
|
624
|
+
) -> list[GateFinding]:
|
|
625
|
+
"""Cluster 49: Detect real-looking secrets in test files."""
|
|
626
|
+
import re
|
|
627
|
+
|
|
628
|
+
if not content.strip():
|
|
629
|
+
return []
|
|
630
|
+
|
|
631
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
632
|
+
if not (basename.startswith("test_") or basename.startswith("conftest") or "_test." in basename):
|
|
633
|
+
return []
|
|
634
|
+
|
|
635
|
+
findings: list[GateFinding] = []
|
|
636
|
+
|
|
637
|
+
secret_patterns = [
|
|
638
|
+
(r'(?:sk|pk)[-_](?:live|test)[-_][a-zA-Z0-9]{20,}', "Stripe-like API key"),
|
|
639
|
+
(r'ghp_[a-zA-Z0-9]{36,}', "GitHub personal access token"),
|
|
640
|
+
(r'gho_[a-zA-Z0-9]{36,}', "GitHub OAuth token"),
|
|
641
|
+
(r'AKIA[A-Z0-9]{16}', "AWS access key ID"),
|
|
642
|
+
(r'eyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}', "JWT token"),
|
|
643
|
+
(r'xox[bpsar]-[a-zA-Z0-9-]{20,}', "Slack token"),
|
|
644
|
+
(r'sk-[a-zA-Z0-9]{40,}', "OpenAI API key"),
|
|
645
|
+
(r'AIza[a-zA-Z0-9_-]{35}', "Google API key"),
|
|
646
|
+
]
|
|
647
|
+
|
|
648
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
649
|
+
stripped = line.strip()
|
|
650
|
+
if stripped.startswith("#") or stripped.startswith("//"):
|
|
651
|
+
continue
|
|
652
|
+
for pattern, description in secret_patterns:
|
|
653
|
+
if re.search(pattern, stripped):
|
|
654
|
+
if any(ph in stripped.lower() for ph in ("placeholder", "example", "fake", "mock", "dummy", "xxx", "test_key")):
|
|
655
|
+
continue
|
|
656
|
+
detail = f"Possible {description} in test file (line {i})"
|
|
657
|
+
findings.append(build_finding(
|
|
658
|
+
check_id="test_secret_scan",
|
|
659
|
+
category=GateCategory.TRUTH_BOUNDARY,
|
|
660
|
+
title=f"[test_secrets] {file_path}:{i}",
|
|
661
|
+
severity=GateSeverity.HIGH,
|
|
662
|
+
impact=GateImpact.REVISE,
|
|
663
|
+
summary=detail,
|
|
664
|
+
recommendation="Replace real secrets with obviously fake placeholders (e.g. 'fake-key-xxx').",
|
|
665
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
666
|
+
repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
|
|
667
|
+
executor_action=f"Remove secret from test file at {file_path}:{i}",
|
|
668
|
+
))
|
|
669
|
+
break
|
|
670
|
+
if len(findings) >= 10:
|
|
671
|
+
break
|
|
672
|
+
|
|
673
|
+
return findings
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
# ---------------------------------------------------------------------------
|
|
677
|
+
# Cluster 50: Unpinned Dependencies
|
|
678
|
+
# ---------------------------------------------------------------------------
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
def assess_unpinned_dependencies(
|
|
682
|
+
file_path: str,
|
|
683
|
+
content: str,
|
|
684
|
+
) -> list[GateFinding]:
|
|
685
|
+
"""Cluster 50: Detect unpinned dependency versions."""
|
|
686
|
+
import re
|
|
687
|
+
|
|
688
|
+
if not content.strip():
|
|
689
|
+
return []
|
|
690
|
+
|
|
691
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
692
|
+
|
|
693
|
+
findings: list[GateFinding] = []
|
|
694
|
+
|
|
695
|
+
if basename.startswith("requirements") and basename.endswith(".txt"):
|
|
696
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
697
|
+
stripped = line.strip()
|
|
698
|
+
if not stripped or stripped.startswith("#") or stripped.startswith("-"):
|
|
699
|
+
continue
|
|
700
|
+
if re.match(r'^[a-zA-Z][a-zA-Z0-9._-]*\s*$', stripped):
|
|
701
|
+
detail = f"Unpinned dependency: '{stripped}' -- add ==X.Y.Z"
|
|
702
|
+
findings.append(build_finding(
|
|
703
|
+
check_id="unpinned_dep_scan",
|
|
704
|
+
category=GateCategory.CONTRACT,
|
|
705
|
+
title=f"[unpinned_dependencies] {file_path}:{i}",
|
|
706
|
+
severity=GateSeverity.MEDIUM,
|
|
707
|
+
impact=GateImpact.REVISE,
|
|
708
|
+
summary=detail,
|
|
709
|
+
recommendation=f"Pin the dependency with an exact version: `{stripped}==X.Y.Z`.",
|
|
710
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
711
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
712
|
+
executor_action=f"Pin dependency '{stripped}' at {file_path}:{i}",
|
|
713
|
+
))
|
|
714
|
+
elif re.search(r'>=|<=|~=|!=', stripped) and '==' not in stripped:
|
|
715
|
+
detail = f"Loosely pinned: '{stripped}' -- prefer exact ==X.Y.Z"
|
|
716
|
+
findings.append(build_finding(
|
|
717
|
+
check_id="unpinned_dep_scan",
|
|
718
|
+
category=GateCategory.CONTRACT,
|
|
719
|
+
title=f"[unpinned_dependencies] {file_path}:{i}",
|
|
720
|
+
severity=GateSeverity.LOW,
|
|
721
|
+
impact=GateImpact.WARN,
|
|
722
|
+
summary=detail,
|
|
723
|
+
recommendation="Use exact version pinning (`==X.Y.Z`) for reproducible builds.",
|
|
724
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
725
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
726
|
+
executor_action=f"Pin dependency at {file_path}:{i}",
|
|
727
|
+
))
|
|
728
|
+
if len(findings) >= 10:
|
|
729
|
+
break
|
|
730
|
+
|
|
731
|
+
elif basename == "package.json":
|
|
732
|
+
import json as json_mod
|
|
733
|
+
try:
|
|
734
|
+
pkg = json_mod.loads(content)
|
|
735
|
+
except (json_mod.JSONDecodeError, ValueError):
|
|
736
|
+
return []
|
|
737
|
+
for section in ("dependencies", "devDependencies"):
|
|
738
|
+
deps = pkg.get(section, {})
|
|
739
|
+
if not isinstance(deps, dict):
|
|
740
|
+
continue
|
|
741
|
+
for name, version in deps.items():
|
|
742
|
+
if not isinstance(version, str):
|
|
743
|
+
continue
|
|
744
|
+
if version.startswith("^") or version.startswith("~") or version == "*":
|
|
745
|
+
detail = f"Loosely pinned '{name}': '{version}' in {section}"
|
|
746
|
+
findings.append(build_finding(
|
|
747
|
+
check_id="unpinned_dep_scan",
|
|
748
|
+
category=GateCategory.CONTRACT,
|
|
749
|
+
title=f"[unpinned_dependencies] {file_path}:{section}:{name}",
|
|
750
|
+
severity=GateSeverity.LOW,
|
|
751
|
+
impact=GateImpact.WARN,
|
|
752
|
+
summary=detail,
|
|
753
|
+
recommendation=f"Use exact version pinning for '{name}' in {section}.",
|
|
754
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
755
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
756
|
+
executor_action=f"Pin '{name}' in {section} at {file_path}",
|
|
757
|
+
))
|
|
758
|
+
if len(findings) >= 10:
|
|
759
|
+
break
|
|
760
|
+
else:
|
|
761
|
+
return []
|
|
762
|
+
|
|
763
|
+
return findings
|