vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,808 @@
|
|
|
1
|
+
"""Code style, quality metrics, and false-positive allowlist. Clusters 21, 22, 24, 25, 26, 28, 29.
|
|
2
|
+
|
|
3
|
+
Clusters:
|
|
4
|
+
21 - Magic Numbers
|
|
5
|
+
22 - Error Message Quality
|
|
6
|
+
24 - Naming Consistency
|
|
7
|
+
25 - Secrets in Code
|
|
8
|
+
26 - TODO/FIXME Tracker
|
|
9
|
+
28 - Log Level Appropriateness
|
|
10
|
+
29 - File Encoding Consistency
|
|
11
|
+
|
|
12
|
+
Also contains the false-positive allowlist infrastructure (AllowlistEntry,
|
|
13
|
+
load_allowlist, revalidate_allowlist, save_allowlist, filter_by_allowlist).
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
from .core import detect_language
|
|
19
|
+
from ...gate_models import (
|
|
20
|
+
EvidenceReference,
|
|
21
|
+
GateCategory,
|
|
22
|
+
GateFinding,
|
|
23
|
+
GateImpact,
|
|
24
|
+
GateSeverity,
|
|
25
|
+
RepairKind,
|
|
26
|
+
)
|
|
27
|
+
from ..common import (
|
|
28
|
+
build_finding,
|
|
29
|
+
collect_constant_container_literal_lines,
|
|
30
|
+
is_section_header_comment,
|
|
31
|
+
)
|
|
32
|
+
from .._ast_helpers import collect_string_constant_line_ranges
|
|
33
|
+
import logging
|
|
34
|
+
_log = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Cluster 25: Secrets in Code
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_SECRET_PATTERNS: tuple[tuple[str, str], ...] = (
|
|
43
|
+
(r"(?:password|passwd|pwd)\s*=\s*['\"][^'\"]{4,}['\"]", "Hardcoded password"),
|
|
44
|
+
(r"(?:api_key|apikey|api_secret)\s*=\s*['\"][^'\"]{8,}['\"]", "Hardcoded API key"),
|
|
45
|
+
(r"(?:secret|token|auth)\s*=\s*['\"][A-Za-z0-9+/=]{16,}['\"]", "Hardcoded secret/token"),
|
|
46
|
+
(r"sk-[a-zA-Z0-9]{20,}", "OpenAI-style API key"),
|
|
47
|
+
(r"ghp_[a-zA-Z0-9]{36,}", "GitHub personal access token"),
|
|
48
|
+
(r"glpat-[a-zA-Z0-9\-]{20,}", "GitLab personal access token"),
|
|
49
|
+
(r"AKIA[0-9A-Z]{16}", "AWS access key ID"),
|
|
50
|
+
(r"-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----", "Private key in source"),
|
|
51
|
+
(r"(?:mongodb|postgres|mysql|redis)://[^'\"\s]+:[^'\"\s]+@", "Database connection string with credentials"),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def assess_secrets_in_code(
|
|
56
|
+
file_path: str,
|
|
57
|
+
content: str,
|
|
58
|
+
) -> list[GateFinding]:
|
|
59
|
+
"""Cluster 25: Detect hardcoded secrets, API keys, and credentials in source code."""
|
|
60
|
+
import re
|
|
61
|
+
|
|
62
|
+
if not content.strip():
|
|
63
|
+
return [] # NOT_APPLICABLE
|
|
64
|
+
|
|
65
|
+
findings: list[GateFinding] = []
|
|
66
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
67
|
+
stripped = line.lstrip()
|
|
68
|
+
if stripped.startswith("#") or stripped.startswith("//") or stripped.startswith("*"):
|
|
69
|
+
continue
|
|
70
|
+
if any(marker in line.lower() for marker in ("example", "placeholder", "xxx", "changeme", "your_", "test_key", "<your", "fake")):
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
for pattern, description in _SECRET_PATTERNS:
|
|
74
|
+
if re.search(pattern, line, re.IGNORECASE):
|
|
75
|
+
findings.append(build_finding(
|
|
76
|
+
check_id="secrets_scan",
|
|
77
|
+
category=GateCategory.TRUTH_BOUNDARY,
|
|
78
|
+
title=f"[secrets_in_code] {file_path}:{i}",
|
|
79
|
+
severity=GateSeverity.CRITICAL,
|
|
80
|
+
impact=GateImpact.BLOCK,
|
|
81
|
+
summary=f"{description} (line {i})",
|
|
82
|
+
recommendation=f"Remove hardcoded secret from source. Use environment variables or a secrets manager.",
|
|
83
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"{description} (line {i})", ok=False),),
|
|
84
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
85
|
+
executor_action=f"Remove hardcoded secret at {file_path}:{i}",
|
|
86
|
+
allowlist_allowed=False,
|
|
87
|
+
))
|
|
88
|
+
break # one finding per line is enough
|
|
89
|
+
return findings
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
# Cluster 21: Magic Numbers
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
_SAFE_NUMBERS = frozenset({
|
|
98
|
+
0, 1, 2, 3, 4, 5, -1, -2,
|
|
99
|
+
10, 100, 1000,
|
|
100
|
+
200, 201, 204, 301, 302, 400, 401, 403, 404, 409, 500, 501, 503,
|
|
101
|
+
60, 120, 300, 3600,
|
|
102
|
+
8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
# F9f: values that are always safe, regardless of context.
|
|
106
|
+
_ALWAYS_SAFE_NUMBERS = frozenset({0, 1, 2, -1})
|
|
107
|
+
|
|
108
|
+
# FP-round2-B (2026-06-28): integer literals with |value| below this bound are
|
|
109
|
+
# treated as benign small constants (terminal widths, ASCII codes, byte values,
|
|
110
|
+
# small counts) and not reported. Only larger / unusual literals are flagged.
|
|
111
|
+
_MAGIC_INT_BOUND = 256
|
|
112
|
+
|
|
113
|
+
# F9f: comment markers that document a fixed count (e.g., "C1..C11", "11 clusters").
|
|
114
|
+
_DOCUMENTED_COUNT_MARKERS: tuple[str, ...] = (
|
|
115
|
+
"c1..c", # "C1..C11"
|
|
116
|
+
"1..n",
|
|
117
|
+
"0..n",
|
|
118
|
+
"clusters",
|
|
119
|
+
"documented count",
|
|
120
|
+
"fixed count",
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _collect_constant_assignment_lines(content: str) -> set[int]:
|
|
125
|
+
"""F9f: return line numbers of assignments whose target is a single
|
|
126
|
+
UPPER_CASE Name AND whose RHS is a simple compile-time literal.
|
|
127
|
+
|
|
128
|
+
F9f-tighten (2026-04-23): RHS scrutiny.
|
|
129
|
+
Previously ANY UPPER_CASE assignment skipped the entire line, which hid
|
|
130
|
+
magic numbers inside expressions like ``FOO = compute(42)``. We now only
|
|
131
|
+
skip when the RHS is a pure literal (Constant or container-of-Constants).
|
|
132
|
+
If the RHS contains Call / BinOp / Name / Attribute / Subscript / Compare,
|
|
133
|
+
the line stays eligible for magic-number scanning.
|
|
134
|
+
"""
|
|
135
|
+
import ast
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
tree = ast.parse(content)
|
|
139
|
+
except SyntaxError:
|
|
140
|
+
return set()
|
|
141
|
+
|
|
142
|
+
lines: set[int] = set()
|
|
143
|
+
|
|
144
|
+
def _is_const_name(name: str) -> bool:
|
|
145
|
+
if not name or not name.isidentifier():
|
|
146
|
+
return False
|
|
147
|
+
# Allow underscore prefix (e.g. _MAX_...) and digits; must be all upper.
|
|
148
|
+
stripped = name.lstrip("_")
|
|
149
|
+
if not stripped:
|
|
150
|
+
return False
|
|
151
|
+
return stripped.upper() == stripped and any(ch.isalpha() for ch in stripped)
|
|
152
|
+
|
|
153
|
+
def _is_pure_literal_rhs(value: ast.AST) -> bool:
|
|
154
|
+
"""True if *value* is a pure literal (no Call / Name / BinOp / etc.)."""
|
|
155
|
+
if isinstance(value, ast.Constant):
|
|
156
|
+
return True
|
|
157
|
+
if isinstance(value, ast.UnaryOp) and isinstance(value.operand, ast.Constant):
|
|
158
|
+
return True
|
|
159
|
+
if isinstance(value, (ast.List, ast.Tuple, ast.Set)):
|
|
160
|
+
return all(_is_pure_literal_rhs(e) for e in value.elts)
|
|
161
|
+
if isinstance(value, ast.Dict):
|
|
162
|
+
keys = [k for k in value.keys if k is not None]
|
|
163
|
+
return all(_is_pure_literal_rhs(k) for k in keys) and all(
|
|
164
|
+
_is_pure_literal_rhs(v) for v in value.values
|
|
165
|
+
)
|
|
166
|
+
return False
|
|
167
|
+
|
|
168
|
+
for node in ast.walk(tree):
|
|
169
|
+
if isinstance(node, ast.Assign):
|
|
170
|
+
targets = node.targets
|
|
171
|
+
if (
|
|
172
|
+
len(targets) == 1
|
|
173
|
+
and isinstance(targets[0], ast.Name)
|
|
174
|
+
and _is_const_name(targets[0].id)
|
|
175
|
+
and _is_pure_literal_rhs(node.value)
|
|
176
|
+
):
|
|
177
|
+
start = node.lineno
|
|
178
|
+
end = getattr(node, "end_lineno", start) or start
|
|
179
|
+
for ln in range(start, end + 1):
|
|
180
|
+
lines.add(ln)
|
|
181
|
+
elif isinstance(node, ast.AnnAssign):
|
|
182
|
+
if (
|
|
183
|
+
isinstance(node.target, ast.Name)
|
|
184
|
+
and _is_const_name(node.target.id)
|
|
185
|
+
and node.value is not None
|
|
186
|
+
and _is_pure_literal_rhs(node.value)
|
|
187
|
+
):
|
|
188
|
+
start = node.lineno
|
|
189
|
+
end = getattr(node, "end_lineno", start) or start
|
|
190
|
+
for ln in range(start, end + 1):
|
|
191
|
+
lines.add(ln)
|
|
192
|
+
return lines
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _collect_docstring_ranges_for_magic(content: str) -> list[tuple[int, int]]:
|
|
196
|
+
"""F9f: docstring ranges to skip literals inside them. AST-based."""
|
|
197
|
+
import ast
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
tree = ast.parse(content)
|
|
201
|
+
except SyntaxError:
|
|
202
|
+
return []
|
|
203
|
+
|
|
204
|
+
ranges: list[tuple[int, int]] = []
|
|
205
|
+
for node in ast.walk(tree):
|
|
206
|
+
if not isinstance(
|
|
207
|
+
node,
|
|
208
|
+
(ast.Module, ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef),
|
|
209
|
+
):
|
|
210
|
+
continue
|
|
211
|
+
body = getattr(node, "body", None)
|
|
212
|
+
if not body:
|
|
213
|
+
continue
|
|
214
|
+
first = body[0]
|
|
215
|
+
if (
|
|
216
|
+
isinstance(first, ast.Expr)
|
|
217
|
+
and isinstance(first.value, ast.Constant)
|
|
218
|
+
and isinstance(first.value.value, str)
|
|
219
|
+
):
|
|
220
|
+
start = first.lineno
|
|
221
|
+
end = getattr(first, "end_lineno", start) or start
|
|
222
|
+
ranges.append((start, end))
|
|
223
|
+
return ranges
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _has_documented_count_marker(lines: list[str], lineno_1based: int) -> bool:
|
|
227
|
+
"""F9f: look within 3 preceding non-blank lines for a comment marker
|
|
228
|
+
indicating the literal is a documented count."""
|
|
229
|
+
inspected = 0
|
|
230
|
+
j = lineno_1based - 2 # 0-based previous line
|
|
231
|
+
while j >= 0 and inspected < 3:
|
|
232
|
+
ln = lines[j].strip()
|
|
233
|
+
if not ln:
|
|
234
|
+
j -= 1
|
|
235
|
+
continue
|
|
236
|
+
inspected += 1
|
|
237
|
+
lowered = ln.lower()
|
|
238
|
+
if lowered.startswith("#") or lowered.startswith("//"):
|
|
239
|
+
for marker in _DOCUMENTED_COUNT_MARKERS:
|
|
240
|
+
if marker in lowered:
|
|
241
|
+
return True
|
|
242
|
+
j -= 1
|
|
243
|
+
return False
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def assess_magic_numbers(
|
|
247
|
+
file_path: str,
|
|
248
|
+
content: str,
|
|
249
|
+
) -> list[GateFinding]:
|
|
250
|
+
"""Cluster 21: Detect hardcoded numeric literals in business logic.
|
|
251
|
+
|
|
252
|
+
F9f refinements:
|
|
253
|
+
- Always skip 0, 1, 2, -1.
|
|
254
|
+
- Skip literals that are the RHS of an UPPER_CASE constant assignment
|
|
255
|
+
(AST-based).
|
|
256
|
+
- Skip literals preceded by a `# C1..CN` / `# N clusters` / similar
|
|
257
|
+
"documented count" comment marker within 3 lines.
|
|
258
|
+
- Skip literals inside docstrings (AST-based).
|
|
259
|
+
"""
|
|
260
|
+
import re
|
|
261
|
+
|
|
262
|
+
lang = detect_language(file_path)
|
|
263
|
+
if lang not in ("python", "javascript", "typescript"):
|
|
264
|
+
return [] # NOT_APPLICABLE
|
|
265
|
+
|
|
266
|
+
if not content.strip():
|
|
267
|
+
return [] # NOT_APPLICABLE
|
|
268
|
+
|
|
269
|
+
basename = file_path.replace("\\", "/").split("/")[-1]
|
|
270
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
271
|
+
return [] # NOT_APPLICABLE
|
|
272
|
+
|
|
273
|
+
# F9f AST pre-pass (Python only — JS/TS fall back to heuristics).
|
|
274
|
+
const_assign_lines: set[int] = set()
|
|
275
|
+
docstring_ranges: list[tuple[int, int]] = []
|
|
276
|
+
if lang == "python":
|
|
277
|
+
const_assign_lines = _collect_constant_assignment_lines(content)
|
|
278
|
+
docstring_ranges = _collect_docstring_ranges_for_magic(content)
|
|
279
|
+
|
|
280
|
+
def _in_docstring(lineno_1based: int) -> bool:
|
|
281
|
+
return any(s <= lineno_1based <= e for s, e in docstring_ranges)
|
|
282
|
+
|
|
283
|
+
all_lines = content.splitlines()
|
|
284
|
+
|
|
285
|
+
findings: list[GateFinding] = []
|
|
286
|
+
for i, line in enumerate(all_lines, 1):
|
|
287
|
+
stripped = line.strip()
|
|
288
|
+
if not stripped or stripped.startswith("#") or stripped.startswith("//"):
|
|
289
|
+
continue
|
|
290
|
+
# F9f-tighten (2026-04-23): the old regex ``^[A-Z_]+\s*[=:]`` skipped
|
|
291
|
+
# any UPPER-prefixed assignment unconditionally. For Python we now
|
|
292
|
+
# defer to the AST pre-pass (``const_assign_lines``) which is precise
|
|
293
|
+
# about RHS shape. For JS/TS we keep the legacy regex as a
|
|
294
|
+
# best-effort heuristic (no AST pre-pass available).
|
|
295
|
+
if lang != "python" and re.match(r"^[A-Z_][A-Z_0-9]*\s*[=:]", stripped):
|
|
296
|
+
continue
|
|
297
|
+
if stripped.startswith(("import ", "from ", "@", '"""', "'''")):
|
|
298
|
+
continue
|
|
299
|
+
if stripped.startswith(("'", '"', 'f"', "f'", 'b"', "b'", 'r"', "r'")):
|
|
300
|
+
continue
|
|
301
|
+
|
|
302
|
+
# F9f: line inside docstring range → skip entirely.
|
|
303
|
+
if _in_docstring(i):
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
# F9f: line is part of an UPPER_CASE constant assignment → skip.
|
|
307
|
+
if i in const_assign_lines:
|
|
308
|
+
continue
|
|
309
|
+
|
|
310
|
+
# F9f: documented count marker in preceding 3 lines → skip line.
|
|
311
|
+
if _has_documented_count_marker(all_lines, i):
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
for m in re.finditer(r"\b(\d+(?:\.\d+)?)\b", line):
|
|
315
|
+
try:
|
|
316
|
+
val = float(m.group(1))
|
|
317
|
+
int_val = int(val) if val == int(val) else None
|
|
318
|
+
except (ValueError, OverflowError):
|
|
319
|
+
continue
|
|
320
|
+
|
|
321
|
+
# F9f: 0, 1, 2, -1 always skipped.
|
|
322
|
+
if int_val is not None and int_val in _ALWAYS_SAFE_NUMBERS:
|
|
323
|
+
continue
|
|
324
|
+
|
|
325
|
+
if int_val is not None and int_val in _SAFE_NUMBERS:
|
|
326
|
+
continue
|
|
327
|
+
# FP-round2-B (2026-06-28): raise the small-int suppression bound.
|
|
328
|
+
# On real codebases the vast majority of bare small integers are
|
|
329
|
+
# benign (terminal widths like 24/80, ASCII control codes like 127,
|
|
330
|
+
# byte values, small column/limit counts like 11/12/20/50). The
|
|
331
|
+
# old window was only -10..10, which flagged every such value as a
|
|
332
|
+
# "magic number" and dominated the noise on click/mcp/filelock.
|
|
333
|
+
# We now suppress |int| < _MAGIC_INT_BOUND (256). Genuinely unusual
|
|
334
|
+
# magic constants (timeouts in seconds like 86400, bit masks like
|
|
335
|
+
# 65537, large sizes) are >= 256 and stay flagged. HTTP codes,
|
|
336
|
+
# powers of two up to 4096, and time constants are still covered
|
|
337
|
+
# explicitly by _SAFE_NUMBERS above.
|
|
338
|
+
if int_val is not None and -_MAGIC_INT_BOUND < int_val < _MAGIC_INT_BOUND:
|
|
339
|
+
continue
|
|
340
|
+
# FP-round2-B: sub-unit floats (|x| < 1.0) are almost always benign
|
|
341
|
+
# ratios / poll intervals / probabilities (e.g. 0.5, 0.1) rather
|
|
342
|
+
# than load-bearing magic constants. Suppress them conservatively.
|
|
343
|
+
if int_val is None:
|
|
344
|
+
try:
|
|
345
|
+
if abs(val) < 1.0:
|
|
346
|
+
continue
|
|
347
|
+
except (TypeError, ValueError):
|
|
348
|
+
pass
|
|
349
|
+
col = m.start()
|
|
350
|
+
pre = line[:col]
|
|
351
|
+
if pre.count('"') % 2 == 1 or pre.count("'") % 2 == 1:
|
|
352
|
+
continue
|
|
353
|
+
if re.search(r"range\s*\(|enumerate\s*\(|\[\s*$", pre[-20:] if len(pre) >= 20 else pre):
|
|
354
|
+
continue
|
|
355
|
+
if "field(" in line or "= field(" in line:
|
|
356
|
+
continue
|
|
357
|
+
if re.search(r'\w+\s*=\s*' + re.escape(m.group(1)) + r'\b', line):
|
|
358
|
+
continue
|
|
359
|
+
# F9f-tighten (2026-04-23): narrow the bracket/colon suppression.
|
|
360
|
+
# Old: ``[:N`` / ``N:`` anywhere in line — which suppressed
|
|
361
|
+
# legitimate threshold checks like ``if file_count > 2000:``.
|
|
362
|
+
# New: only suppress when the literal is inside an index/slice
|
|
363
|
+
# expression, i.e. wrapped by ``[ ... ]`` with ``:`` adjacent.
|
|
364
|
+
# Patterns that must still be suppressed:
|
|
365
|
+
# foo[42] — subscript index
|
|
366
|
+
# foo[42:] — open-end slice
|
|
367
|
+
# foo[:42] — open-start slice
|
|
368
|
+
# foo[10:42] — bounded slice
|
|
369
|
+
# Pattern that MUST flag (new):
|
|
370
|
+
# if x > 2000: — trailing colon is statement terminator
|
|
371
|
+
lit = re.escape(m.group(1))
|
|
372
|
+
# subscript: [...lit...] — scan for a nearest-preceding '[' and
|
|
373
|
+
# a following ']' without an intervening statement boundary.
|
|
374
|
+
# We keep it simple: check whether the literal is inside any
|
|
375
|
+
# ``[...]`` span on this line. A Python statement never has a
|
|
376
|
+
# trailing ``:`` inside brackets.
|
|
377
|
+
in_brackets = False
|
|
378
|
+
open_cnt = 0
|
|
379
|
+
lit_start = m.start()
|
|
380
|
+
lit_end = m.end()
|
|
381
|
+
for k, ch in enumerate(line):
|
|
382
|
+
if ch == "[":
|
|
383
|
+
open_cnt += 1
|
|
384
|
+
elif ch == "]":
|
|
385
|
+
open_cnt = max(0, open_cnt - 1)
|
|
386
|
+
if k == lit_start and open_cnt > 0:
|
|
387
|
+
in_brackets = True
|
|
388
|
+
break
|
|
389
|
+
if in_brackets:
|
|
390
|
+
continue
|
|
391
|
+
# Dict-key / dict-value context: ``{42: ...}`` or ``{...: 42}``.
|
|
392
|
+
# Suppress only when braces genuinely enclose the literal.
|
|
393
|
+
in_braces = False
|
|
394
|
+
brace_cnt = 0
|
|
395
|
+
for k, ch in enumerate(line):
|
|
396
|
+
if ch == "{":
|
|
397
|
+
brace_cnt += 1
|
|
398
|
+
elif ch == "}":
|
|
399
|
+
brace_cnt = max(0, brace_cnt - 1)
|
|
400
|
+
if k == lit_start and brace_cnt > 0:
|
|
401
|
+
in_braces = True
|
|
402
|
+
break
|
|
403
|
+
if in_braces:
|
|
404
|
+
continue
|
|
405
|
+
|
|
406
|
+
findings.append(build_finding(
|
|
407
|
+
check_id="magic_number_scan",
|
|
408
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
409
|
+
title=f"[magic_numbers] {file_path}:{i}",
|
|
410
|
+
severity=GateSeverity.LOW,
|
|
411
|
+
impact=GateImpact.WARN,
|
|
412
|
+
summary=f"Magic number {m.group(1)} at line {i} -- consider naming as a constant",
|
|
413
|
+
recommendation=f"Extract magic number {m.group(1)} into a named constant.",
|
|
414
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"Magic number {m.group(1)} at line {i} -- consider naming as a constant", ok=False),),
|
|
415
|
+
repair_kind=RepairKind.REFACTOR.value,
|
|
416
|
+
executor_action=f"Extract magic number at {file_path}:{i} into a named constant",
|
|
417
|
+
))
|
|
418
|
+
if len(findings) >= 20:
|
|
419
|
+
break
|
|
420
|
+
return findings[:20]
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
# ---------------------------------------------------------------------------
|
|
424
|
+
# Cluster 22: Error Message Quality
|
|
425
|
+
# ---------------------------------------------------------------------------
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def assess_error_message_quality(
|
|
429
|
+
file_path: str,
|
|
430
|
+
content: str,
|
|
431
|
+
) -> list[GateFinding]:
|
|
432
|
+
"""Cluster 22: Detect generic/unhelpful error messages in raise/throw statements."""
|
|
433
|
+
import re
|
|
434
|
+
|
|
435
|
+
if not content.strip():
|
|
436
|
+
return [] # NOT_APPLICABLE
|
|
437
|
+
|
|
438
|
+
basename = file_path.replace("\\", "/").split("/")[-1]
|
|
439
|
+
if basename.startswith("test_"):
|
|
440
|
+
return [] # NOT_APPLICABLE
|
|
441
|
+
|
|
442
|
+
_GENERIC_MESSAGES = (
|
|
443
|
+
r'raise\s+\w+Error\s*\(\s*["\'](?:error|failed|bad|invalid|wrong|oops|problem|issue)["\']',
|
|
444
|
+
r'raise\s+Exception\s*\(\s*["\'][^"\']{0,10}["\']',
|
|
445
|
+
r'raise\s+\w+Error\s*\(\s*\)\s*$',
|
|
446
|
+
r'raise\s+Exception\s*\(\s*\)\s*$',
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
findings: list[GateFinding] = []
|
|
450
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
451
|
+
stripped = line.strip()
|
|
452
|
+
if stripped.startswith("#"):
|
|
453
|
+
continue
|
|
454
|
+
for pattern in _GENERIC_MESSAGES:
|
|
455
|
+
if re.search(pattern, stripped, re.IGNORECASE):
|
|
456
|
+
findings.append(build_finding(
|
|
457
|
+
check_id="error_msg_scan",
|
|
458
|
+
category=GateCategory.REPORTING,
|
|
459
|
+
title=f"[error_message_quality] {file_path}:{i}",
|
|
460
|
+
severity=GateSeverity.LOW,
|
|
461
|
+
impact=GateImpact.WARN,
|
|
462
|
+
summary=f"Generic error message at line {i}: {stripped[:60]}",
|
|
463
|
+
recommendation="Use descriptive error messages that include context (variable values, expected vs actual).",
|
|
464
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"Generic error message at line {i}: {stripped[:60]}", ok=False),),
|
|
465
|
+
repair_kind=RepairKind.REFACTOR.value,
|
|
466
|
+
executor_action=f"Improve error message at {file_path}:{i}",
|
|
467
|
+
))
|
|
468
|
+
break
|
|
469
|
+
return findings
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# ---------------------------------------------------------------------------
|
|
473
|
+
# Cluster 24: Naming Consistency
|
|
474
|
+
# ---------------------------------------------------------------------------
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def assess_naming_consistency(
|
|
478
|
+
file_path: str,
|
|
479
|
+
content: str,
|
|
480
|
+
) -> list[GateFinding]:
|
|
481
|
+
"""Cluster 24: Detect mixed naming conventions (camelCase vs snake_case) in one module."""
|
|
482
|
+
import re
|
|
483
|
+
|
|
484
|
+
lang = detect_language(file_path)
|
|
485
|
+
if lang not in ("python", "javascript", "typescript"):
|
|
486
|
+
return [] # NOT_APPLICABLE
|
|
487
|
+
|
|
488
|
+
if not content.strip():
|
|
489
|
+
return [] # NOT_APPLICABLE
|
|
490
|
+
|
|
491
|
+
func_names = re.findall(r"(?:^|\n)\s*def (\w+)\s*\(", content)
|
|
492
|
+
if len(func_names) < 3:
|
|
493
|
+
return [] # NOT_APPLICABLE
|
|
494
|
+
|
|
495
|
+
snake = []
|
|
496
|
+
camel = []
|
|
497
|
+
for name in func_names:
|
|
498
|
+
if name.startswith("_"):
|
|
499
|
+
name = name.lstrip("_")
|
|
500
|
+
if not name:
|
|
501
|
+
continue
|
|
502
|
+
if name == name.lower():
|
|
503
|
+
snake.append(name)
|
|
504
|
+
elif re.match(r"[a-z][a-zA-Z0-9]*$", name) and any(c.isupper() for c in name):
|
|
505
|
+
camel.append(name)
|
|
506
|
+
|
|
507
|
+
total = len(snake) + len(camel)
|
|
508
|
+
if total == 0:
|
|
509
|
+
return [] # NOT_APPLICABLE
|
|
510
|
+
|
|
511
|
+
snake_ratio = len(snake) / total if total else 0
|
|
512
|
+
|
|
513
|
+
findings: list[GateFinding] = []
|
|
514
|
+
if snake_ratio > 0.8 and camel:
|
|
515
|
+
for name in camel:
|
|
516
|
+
findings.append(build_finding(
|
|
517
|
+
check_id="naming_scan",
|
|
518
|
+
category=GateCategory.DRIFT,
|
|
519
|
+
title=f"[naming_consistency] {file_path}:{name}",
|
|
520
|
+
severity=GateSeverity.LOW,
|
|
521
|
+
impact=GateImpact.WARN,
|
|
522
|
+
summary=f"camelCase '{name}' in snake_case-dominant module",
|
|
523
|
+
recommendation="Rename to snake_case to match the dominant convention.",
|
|
524
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"camelCase '{name}' in snake_case-dominant module", ok=False),),
|
|
525
|
+
repair_kind=RepairKind.REFACTOR.value,
|
|
526
|
+
executor_action=f"Rename '{name}' to snake_case in {file_path}",
|
|
527
|
+
))
|
|
528
|
+
elif snake_ratio < 0.2 and snake:
|
|
529
|
+
for name in snake[:10]:
|
|
530
|
+
findings.append(build_finding(
|
|
531
|
+
check_id="naming_scan",
|
|
532
|
+
category=GateCategory.DRIFT,
|
|
533
|
+
title=f"[naming_consistency] {file_path}:{name}",
|
|
534
|
+
severity=GateSeverity.LOW,
|
|
535
|
+
impact=GateImpact.WARN,
|
|
536
|
+
summary=f"snake_case '{name}' in camelCase-dominant module",
|
|
537
|
+
recommendation="Rename to camelCase to match the dominant convention.",
|
|
538
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"snake_case '{name}' in camelCase-dominant module", ok=False),),
|
|
539
|
+
repair_kind=RepairKind.REFACTOR.value,
|
|
540
|
+
executor_action=f"Rename '{name}' to camelCase in {file_path}",
|
|
541
|
+
))
|
|
542
|
+
elif 0.3 <= snake_ratio <= 0.7:
|
|
543
|
+
findings.append(build_finding(
|
|
544
|
+
check_id="naming_scan",
|
|
545
|
+
category=GateCategory.DRIFT,
|
|
546
|
+
title=f"[naming_consistency] {file_path}",
|
|
547
|
+
severity=GateSeverity.LOW,
|
|
548
|
+
impact=GateImpact.WARN,
|
|
549
|
+
summary=f"Mixed naming: {len(snake)} snake_case + {len(camel)} camelCase",
|
|
550
|
+
recommendation="Standardize on one naming convention (prefer snake_case for Python).",
|
|
551
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"Mixed naming: {len(snake)} snake_case + {len(camel)} camelCase", ok=False),),
|
|
552
|
+
repair_kind=RepairKind.REFACTOR.value,
|
|
553
|
+
executor_action=f"Standardize naming convention in {file_path}",
|
|
554
|
+
))
|
|
555
|
+
return findings
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
# ---------------------------------------------------------------------------
|
|
559
|
+
# Cluster 26: TODO/FIXME Tracker
|
|
560
|
+
# ---------------------------------------------------------------------------
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
_TECH_DEBT_MARKERS = ("TODO", "FIXME", "HACK", "XXX", "TEMP", "WORKAROUND", "KLUDGE")
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def assess_todo_debt(
|
|
567
|
+
file_path: str,
|
|
568
|
+
content: str,
|
|
569
|
+
max_per_file: int = 5,
|
|
570
|
+
) -> list[GateFinding]:
|
|
571
|
+
"""Cluster 26: Track TODO/FIXME/HACK comments as tech debt inventory.
|
|
572
|
+
|
|
573
|
+
Individual TODOs are info findings. More than max_per_file = warn finding.
|
|
574
|
+
Returns findings for each marker found (as info-level), plus a warn finding
|
|
575
|
+
if count exceeds threshold.
|
|
576
|
+
"""
|
|
577
|
+
import re
|
|
578
|
+
|
|
579
|
+
if not content.strip():
|
|
580
|
+
return [] # NOT_APPLICABLE
|
|
581
|
+
|
|
582
|
+
# F14c sub-fix 1: skip lines inside UPPER_CASE module-level container
|
|
583
|
+
# literals (e.g. ``_TECH_DEBT_MARKERS = ("TODO", "FIXME")``). The gate
|
|
584
|
+
# must not self-match on its own marker definitions.
|
|
585
|
+
skip_lines = set(collect_constant_container_literal_lines(content))
|
|
586
|
+
# F14c extra: also skip interior lines of multi-line string constants
|
|
587
|
+
# (docstrings that explain marker patterns). Reuses F14a helper.
|
|
588
|
+
skip_lines |= set(collect_string_constant_line_ranges(content))
|
|
589
|
+
|
|
590
|
+
found: list[tuple[int, str, str]] = []
|
|
591
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
592
|
+
if i in skip_lines:
|
|
593
|
+
continue
|
|
594
|
+
# F14c sub-fix 2: skip visual section-header separator comments such
|
|
595
|
+
# as ``# --- section ---`` or ``# === Legacy Debt (C53) ===``.
|
|
596
|
+
if is_section_header_comment(line):
|
|
597
|
+
continue
|
|
598
|
+
for marker in _TECH_DEBT_MARKERS:
|
|
599
|
+
if re.search(rf"\b{marker}\b", line, re.IGNORECASE):
|
|
600
|
+
found.append((i, marker, line.strip()[:80]))
|
|
601
|
+
break
|
|
602
|
+
|
|
603
|
+
if not found:
|
|
604
|
+
return [] # PASS
|
|
605
|
+
|
|
606
|
+
# Always return a finding per marker (info-level)
|
|
607
|
+
findings: list[GateFinding] = []
|
|
608
|
+
for line_num, marker, text in found:
|
|
609
|
+
findings.append(build_finding(
|
|
610
|
+
check_id="todo_scan",
|
|
611
|
+
category=GateCategory.REPORTING,
|
|
612
|
+
title=f"[todo_debt] {file_path}:{line_num}",
|
|
613
|
+
severity=GateSeverity.INFO,
|
|
614
|
+
impact=GateImpact.WARN,
|
|
615
|
+
summary=f"[{marker}] {text}",
|
|
616
|
+
recommendation="Address tech debt marker or convert to a tracked issue.",
|
|
617
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"[{marker}] {text}", ok=len(found) <= max_per_file),),
|
|
618
|
+
repair_kind=RepairKind.REFACTOR.value,
|
|
619
|
+
executor_action=f"Address {marker} at {file_path}:{line_num}",
|
|
620
|
+
))
|
|
621
|
+
|
|
622
|
+
# If over threshold, add a warn-level summary finding
|
|
623
|
+
if len(found) > max_per_file:
|
|
624
|
+
findings.append(build_finding(
|
|
625
|
+
check_id="todo_scan",
|
|
626
|
+
category=GateCategory.REPORTING,
|
|
627
|
+
title=f"[todo_debt] {file_path}: {len(found)} markers exceed threshold",
|
|
628
|
+
severity=GateSeverity.MEDIUM,
|
|
629
|
+
impact=GateImpact.REVISE,
|
|
630
|
+
summary=f"{len(found)} tech debt markers in {file_path} (threshold: {max_per_file})",
|
|
631
|
+
recommendation=f"Reduce TODO/FIXME count below {max_per_file} per file.",
|
|
632
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"{len(found)} markers exceed threshold {max_per_file}", ok=False),),
|
|
633
|
+
repair_kind=RepairKind.REFACTOR.value,
|
|
634
|
+
executor_action=f"Reduce tech debt markers in {file_path}",
|
|
635
|
+
))
|
|
636
|
+
return findings
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
# ---------------------------------------------------------------------------
|
|
640
|
+
# Cluster 28: Log Level Appropriateness
|
|
641
|
+
# ---------------------------------------------------------------------------
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
def assess_log_level_quality(
|
|
645
|
+
file_path: str,
|
|
646
|
+
content: str,
|
|
647
|
+
) -> list[GateFinding]:
|
|
648
|
+
"""Cluster 28: Detect mismatched log levels vs message severity."""
|
|
649
|
+
import re
|
|
650
|
+
|
|
651
|
+
if not content.strip():
|
|
652
|
+
return [] # NOT_APPLICABLE
|
|
653
|
+
|
|
654
|
+
basename = file_path.replace("\\", "/").split("/")[-1]
|
|
655
|
+
if basename.startswith("test_"):
|
|
656
|
+
return [] # NOT_APPLICABLE
|
|
657
|
+
|
|
658
|
+
_ERROR_KEYWORDS = ("error", "fail", "crash", "fatal", "critical", "exception", "broken", "corrupt")
|
|
659
|
+
_DEBUG_LEVELS = ("debug", "trace")
|
|
660
|
+
_INFO_LEVELS = ("info",)
|
|
661
|
+
|
|
662
|
+
findings: list[GateFinding] = []
|
|
663
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
664
|
+
stripped = line.strip()
|
|
665
|
+
if stripped.startswith("#"):
|
|
666
|
+
continue
|
|
667
|
+
|
|
668
|
+
m = re.search(r"\b(?:log(?:ger)?|_log|logging)\.(debug|info|warning|error|critical)\s*\(\s*[f\"'](.{5,80})", stripped, re.IGNORECASE)
|
|
669
|
+
if not m:
|
|
670
|
+
continue
|
|
671
|
+
|
|
672
|
+
level = m.group(1).lower()
|
|
673
|
+
msg_preview = m.group(2).lower()
|
|
674
|
+
|
|
675
|
+
if level in _DEBUG_LEVELS or level in _INFO_LEVELS:
|
|
676
|
+
if any(kw in msg_preview for kw in _ERROR_KEYWORDS):
|
|
677
|
+
_EXPECTED_FAILURE_PATTERNS = (
|
|
678
|
+
"failed to", "could not", "unable to", "cannot ",
|
|
679
|
+
"timeout", "timed out", "not found", "not available",
|
|
680
|
+
"skipping", "falling back", "does not exist",
|
|
681
|
+
"missing", "unavailable", "unreachable",
|
|
682
|
+
"ignored", "discarded", "dropped", "closed",
|
|
683
|
+
"no longer", "already ", "stale",
|
|
684
|
+
)
|
|
685
|
+
if any(pat in msg_preview for pat in _EXPECTED_FAILURE_PATTERNS):
|
|
686
|
+
continue
|
|
687
|
+
findings.append(build_finding(
|
|
688
|
+
check_id="log_level_scan",
|
|
689
|
+
category=GateCategory.REPORTING,
|
|
690
|
+
title=f"[log_level_quality] {file_path}:{i}",
|
|
691
|
+
severity=GateSeverity.LOW,
|
|
692
|
+
impact=GateImpact.WARN,
|
|
693
|
+
summary=f"log.{level}() with error-severity message at line {i}",
|
|
694
|
+
recommendation=f"Use log.error() or log.warning() for error-severity messages.",
|
|
695
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"log.{level}() with error-severity message at line {i}", ok=False),),
|
|
696
|
+
repair_kind=RepairKind.REFACTOR.value,
|
|
697
|
+
executor_action=f"Change log level at {file_path}:{i}",
|
|
698
|
+
))
|
|
699
|
+
|
|
700
|
+
if level in ("error", "critical"):
|
|
701
|
+
_NORMAL_KEYWORDS = ("start", "ready", "success", "loaded", "initialized", "connected", "listening")
|
|
702
|
+
if any(kw in msg_preview for kw in _NORMAL_KEYWORDS) and not any(kw in msg_preview for kw in _ERROR_KEYWORDS):
|
|
703
|
+
findings.append(build_finding(
|
|
704
|
+
check_id="log_level_scan",
|
|
705
|
+
category=GateCategory.REPORTING,
|
|
706
|
+
title=f"[log_level_quality] {file_path}:{i}",
|
|
707
|
+
severity=GateSeverity.LOW,
|
|
708
|
+
impact=GateImpact.WARN,
|
|
709
|
+
summary=f"log.{level}() with normal-severity message at line {i}",
|
|
710
|
+
recommendation=f"Use log.info() for normal/success messages.",
|
|
711
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"log.{level}() with normal-severity message at line {i}", ok=False),),
|
|
712
|
+
repair_kind=RepairKind.REFACTOR.value,
|
|
713
|
+
executor_action=f"Change log level at {file_path}:{i}",
|
|
714
|
+
))
|
|
715
|
+
return findings
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
# ---------------------------------------------------------------------------
|
|
719
|
+
# Cluster 29: File Encoding Consistency
|
|
720
|
+
# ---------------------------------------------------------------------------
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def assess_encoding_consistency(
|
|
724
|
+
file_path: str,
|
|
725
|
+
raw_bytes: bytes,
|
|
726
|
+
) -> list[GateFinding]:
|
|
727
|
+
"""Cluster 29: Check file encoding, BOM, and line ending consistency."""
|
|
728
|
+
if not raw_bytes:
|
|
729
|
+
return [] # NOT_APPLICABLE
|
|
730
|
+
|
|
731
|
+
findings: list[GateFinding] = []
|
|
732
|
+
|
|
733
|
+
if raw_bytes.startswith(b"\xef\xbb\xbf"):
|
|
734
|
+
findings.append(build_finding(
|
|
735
|
+
check_id="encoding_scan",
|
|
736
|
+
category=GateCategory.CONTRACT,
|
|
737
|
+
title=f"[encoding_consistency] {file_path}:BOM",
|
|
738
|
+
severity=GateSeverity.LOW,
|
|
739
|
+
impact=GateImpact.WARN,
|
|
740
|
+
summary="UTF-8 BOM detected -- most tools/editors don't need BOM for UTF-8",
|
|
741
|
+
recommendation="Remove the UTF-8 BOM from the file.",
|
|
742
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail="UTF-8 BOM detected -- most tools/editors don't need BOM for UTF-8", ok=False),),
|
|
743
|
+
repair_kind=RepairKind.FIX_ENCODING.value,
|
|
744
|
+
executor_action=f"Remove BOM from {file_path}",
|
|
745
|
+
))
|
|
746
|
+
|
|
747
|
+
try:
|
|
748
|
+
raw_bytes.decode("utf-8")
|
|
749
|
+
except UnicodeDecodeError:
|
|
750
|
+
findings.append(build_finding(
|
|
751
|
+
check_id="encoding_scan",
|
|
752
|
+
category=GateCategory.CONTRACT,
|
|
753
|
+
title=f"[encoding_consistency] {file_path}:encoding",
|
|
754
|
+
severity=GateSeverity.MEDIUM,
|
|
755
|
+
impact=GateImpact.REVISE,
|
|
756
|
+
summary="File is not valid UTF-8 -- may be Latin-1 or CP1252",
|
|
757
|
+
recommendation="Re-encode the file as UTF-8.",
|
|
758
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail="File is not valid UTF-8 -- may be Latin-1 or CP1252", ok=False),),
|
|
759
|
+
repair_kind=RepairKind.FIX_ENCODING.value,
|
|
760
|
+
executor_action=f"Re-encode {file_path} as UTF-8",
|
|
761
|
+
))
|
|
762
|
+
|
|
763
|
+
has_crlf = b"\r\n" in raw_bytes
|
|
764
|
+
lf_only = raw_bytes.replace(b"\r\n", b"")
|
|
765
|
+
has_bare_lf = b"\n" in lf_only
|
|
766
|
+
has_bare_cr = b"\r" in lf_only
|
|
767
|
+
|
|
768
|
+
if has_crlf and has_bare_lf:
|
|
769
|
+
findings.append(build_finding(
|
|
770
|
+
check_id="encoding_scan",
|
|
771
|
+
category=GateCategory.CONTRACT,
|
|
772
|
+
title=f"[encoding_consistency] {file_path}:line_endings",
|
|
773
|
+
severity=GateSeverity.LOW,
|
|
774
|
+
impact=GateImpact.WARN,
|
|
775
|
+
summary="Mixed line endings: both CRLF and LF detected",
|
|
776
|
+
recommendation="Normalize to LF line endings.",
|
|
777
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail="Mixed line endings: both CRLF and LF detected", ok=False),),
|
|
778
|
+
repair_kind=RepairKind.FIX_ENCODING.value,
|
|
779
|
+
executor_action=f"Normalize line endings in {file_path}",
|
|
780
|
+
))
|
|
781
|
+
if has_bare_cr:
|
|
782
|
+
findings.append(build_finding(
|
|
783
|
+
check_id="encoding_scan",
|
|
784
|
+
category=GateCategory.CONTRACT,
|
|
785
|
+
title=f"[encoding_consistency] {file_path}:line_endings",
|
|
786
|
+
severity=GateSeverity.LOW,
|
|
787
|
+
impact=GateImpact.WARN,
|
|
788
|
+
summary="Old Mac-style CR line endings detected",
|
|
789
|
+
recommendation="Normalize to LF line endings.",
|
|
790
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail="Old Mac-style CR line endings detected", ok=False),),
|
|
791
|
+
repair_kind=RepairKind.FIX_ENCODING.value,
|
|
792
|
+
executor_action=f"Normalize line endings in {file_path}",
|
|
793
|
+
))
|
|
794
|
+
|
|
795
|
+
if b"\x00" in raw_bytes[:1000]:
|
|
796
|
+
findings.append(build_finding(
|
|
797
|
+
check_id="encoding_scan",
|
|
798
|
+
category=GateCategory.CONTRACT,
|
|
799
|
+
title=f"[encoding_consistency] {file_path}:null_bytes",
|
|
800
|
+
severity=GateSeverity.MEDIUM,
|
|
801
|
+
impact=GateImpact.REVISE,
|
|
802
|
+
summary="Null bytes in file -- may be binary file with text extension",
|
|
803
|
+
recommendation="Remove null bytes or use a binary-safe encoding.",
|
|
804
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail="Null bytes in file -- may be binary file with text extension", ok=False),),
|
|
805
|
+
repair_kind=RepairKind.FIX_ENCODING.value,
|
|
806
|
+
executor_action=f"Remove null bytes from {file_path}",
|
|
807
|
+
))
|
|
808
|
+
return findings
|