vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,808 @@
1
+ """Code style, quality metrics, and false-positive allowlist. Clusters 21, 22, 24, 25, 26, 28, 29.
2
+
3
+ Clusters:
4
+ 21 - Magic Numbers
5
+ 22 - Error Message Quality
6
+ 24 - Naming Consistency
7
+ 25 - Secrets in Code
8
+ 26 - TODO/FIXME Tracker
9
+ 28 - Log Level Appropriateness
10
+ 29 - File Encoding Consistency
11
+
12
+ Also contains the false-positive allowlist infrastructure (AllowlistEntry,
13
+ load_allowlist, revalidate_allowlist, save_allowlist, filter_by_allowlist).
14
+ """
15
+ from __future__ import annotations
16
+
17
+
18
+ from .core import detect_language
19
+ from ...gate_models import (
20
+ EvidenceReference,
21
+ GateCategory,
22
+ GateFinding,
23
+ GateImpact,
24
+ GateSeverity,
25
+ RepairKind,
26
+ )
27
+ from ..common import (
28
+ build_finding,
29
+ collect_constant_container_literal_lines,
30
+ is_section_header_comment,
31
+ )
32
+ from .._ast_helpers import collect_string_constant_line_ranges
33
+ import logging
34
+ _log = logging.getLogger(__name__)
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Cluster 25: Secrets in Code
39
+ # ---------------------------------------------------------------------------
40
+
41
+
42
+ _SECRET_PATTERNS: tuple[tuple[str, str], ...] = (
43
+ (r"(?:password|passwd|pwd)\s*=\s*['\"][^'\"]{4,}['\"]", "Hardcoded password"),
44
+ (r"(?:api_key|apikey|api_secret)\s*=\s*['\"][^'\"]{8,}['\"]", "Hardcoded API key"),
45
+ (r"(?:secret|token|auth)\s*=\s*['\"][A-Za-z0-9+/=]{16,}['\"]", "Hardcoded secret/token"),
46
+ (r"sk-[a-zA-Z0-9]{20,}", "OpenAI-style API key"),
47
+ (r"ghp_[a-zA-Z0-9]{36,}", "GitHub personal access token"),
48
+ (r"glpat-[a-zA-Z0-9\-]{20,}", "GitLab personal access token"),
49
+ (r"AKIA[0-9A-Z]{16}", "AWS access key ID"),
50
+ (r"-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----", "Private key in source"),
51
+ (r"(?:mongodb|postgres|mysql|redis)://[^'\"\s]+:[^'\"\s]+@", "Database connection string with credentials"),
52
+ )
53
+
54
+
55
+ def assess_secrets_in_code(
56
+ file_path: str,
57
+ content: str,
58
+ ) -> list[GateFinding]:
59
+ """Cluster 25: Detect hardcoded secrets, API keys, and credentials in source code."""
60
+ import re
61
+
62
+ if not content.strip():
63
+ return [] # NOT_APPLICABLE
64
+
65
+ findings: list[GateFinding] = []
66
+ for i, line in enumerate(content.splitlines(), 1):
67
+ stripped = line.lstrip()
68
+ if stripped.startswith("#") or stripped.startswith("//") or stripped.startswith("*"):
69
+ continue
70
+ if any(marker in line.lower() for marker in ("example", "placeholder", "xxx", "changeme", "your_", "test_key", "<your", "fake")):
71
+ continue
72
+
73
+ for pattern, description in _SECRET_PATTERNS:
74
+ if re.search(pattern, line, re.IGNORECASE):
75
+ findings.append(build_finding(
76
+ check_id="secrets_scan",
77
+ category=GateCategory.TRUTH_BOUNDARY,
78
+ title=f"[secrets_in_code] {file_path}:{i}",
79
+ severity=GateSeverity.CRITICAL,
80
+ impact=GateImpact.BLOCK,
81
+ summary=f"{description} (line {i})",
82
+ recommendation=f"Remove hardcoded secret from source. Use environment variables or a secrets manager.",
83
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"{description} (line {i})", ok=False),),
84
+ repair_kind=RepairKind.FIX_CONTRACT.value,
85
+ executor_action=f"Remove hardcoded secret at {file_path}:{i}",
86
+ allowlist_allowed=False,
87
+ ))
88
+ break # one finding per line is enough
89
+ return findings
90
+
91
+
92
+ # ---------------------------------------------------------------------------
93
+ # Cluster 21: Magic Numbers
94
+ # ---------------------------------------------------------------------------
95
+
96
+
97
+ _SAFE_NUMBERS = frozenset({
98
+ 0, 1, 2, 3, 4, 5, -1, -2,
99
+ 10, 100, 1000,
100
+ 200, 201, 204, 301, 302, 400, 401, 403, 404, 409, 500, 501, 503,
101
+ 60, 120, 300, 3600,
102
+ 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
103
+ })
104
+
105
+ # F9f: values that are always safe, regardless of context.
106
+ _ALWAYS_SAFE_NUMBERS = frozenset({0, 1, 2, -1})
107
+
108
+ # FP-round2-B (2026-06-28): integer literals with |value| below this bound are
109
+ # treated as benign small constants (terminal widths, ASCII codes, byte values,
110
+ # small counts) and not reported. Only larger / unusual literals are flagged.
111
+ _MAGIC_INT_BOUND = 256
112
+
113
+ # F9f: comment markers that document a fixed count (e.g., "C1..C11", "11 clusters").
114
+ _DOCUMENTED_COUNT_MARKERS: tuple[str, ...] = (
115
+ "c1..c", # "C1..C11"
116
+ "1..n",
117
+ "0..n",
118
+ "clusters",
119
+ "documented count",
120
+ "fixed count",
121
+ )
122
+
123
+
124
+ def _collect_constant_assignment_lines(content: str) -> set[int]:
125
+ """F9f: return line numbers of assignments whose target is a single
126
+ UPPER_CASE Name AND whose RHS is a simple compile-time literal.
127
+
128
+ F9f-tighten (2026-04-23): RHS scrutiny.
129
+ Previously ANY UPPER_CASE assignment skipped the entire line, which hid
130
+ magic numbers inside expressions like ``FOO = compute(42)``. We now only
131
+ skip when the RHS is a pure literal (Constant or container-of-Constants).
132
+ If the RHS contains Call / BinOp / Name / Attribute / Subscript / Compare,
133
+ the line stays eligible for magic-number scanning.
134
+ """
135
+ import ast
136
+
137
+ try:
138
+ tree = ast.parse(content)
139
+ except SyntaxError:
140
+ return set()
141
+
142
+ lines: set[int] = set()
143
+
144
+ def _is_const_name(name: str) -> bool:
145
+ if not name or not name.isidentifier():
146
+ return False
147
+ # Allow underscore prefix (e.g. _MAX_...) and digits; must be all upper.
148
+ stripped = name.lstrip("_")
149
+ if not stripped:
150
+ return False
151
+ return stripped.upper() == stripped and any(ch.isalpha() for ch in stripped)
152
+
153
+ def _is_pure_literal_rhs(value: ast.AST) -> bool:
154
+ """True if *value* is a pure literal (no Call / Name / BinOp / etc.)."""
155
+ if isinstance(value, ast.Constant):
156
+ return True
157
+ if isinstance(value, ast.UnaryOp) and isinstance(value.operand, ast.Constant):
158
+ return True
159
+ if isinstance(value, (ast.List, ast.Tuple, ast.Set)):
160
+ return all(_is_pure_literal_rhs(e) for e in value.elts)
161
+ if isinstance(value, ast.Dict):
162
+ keys = [k for k in value.keys if k is not None]
163
+ return all(_is_pure_literal_rhs(k) for k in keys) and all(
164
+ _is_pure_literal_rhs(v) for v in value.values
165
+ )
166
+ return False
167
+
168
+ for node in ast.walk(tree):
169
+ if isinstance(node, ast.Assign):
170
+ targets = node.targets
171
+ if (
172
+ len(targets) == 1
173
+ and isinstance(targets[0], ast.Name)
174
+ and _is_const_name(targets[0].id)
175
+ and _is_pure_literal_rhs(node.value)
176
+ ):
177
+ start = node.lineno
178
+ end = getattr(node, "end_lineno", start) or start
179
+ for ln in range(start, end + 1):
180
+ lines.add(ln)
181
+ elif isinstance(node, ast.AnnAssign):
182
+ if (
183
+ isinstance(node.target, ast.Name)
184
+ and _is_const_name(node.target.id)
185
+ and node.value is not None
186
+ and _is_pure_literal_rhs(node.value)
187
+ ):
188
+ start = node.lineno
189
+ end = getattr(node, "end_lineno", start) or start
190
+ for ln in range(start, end + 1):
191
+ lines.add(ln)
192
+ return lines
193
+
194
+
195
+ def _collect_docstring_ranges_for_magic(content: str) -> list[tuple[int, int]]:
196
+ """F9f: docstring ranges to skip literals inside them. AST-based."""
197
+ import ast
198
+
199
+ try:
200
+ tree = ast.parse(content)
201
+ except SyntaxError:
202
+ return []
203
+
204
+ ranges: list[tuple[int, int]] = []
205
+ for node in ast.walk(tree):
206
+ if not isinstance(
207
+ node,
208
+ (ast.Module, ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef),
209
+ ):
210
+ continue
211
+ body = getattr(node, "body", None)
212
+ if not body:
213
+ continue
214
+ first = body[0]
215
+ if (
216
+ isinstance(first, ast.Expr)
217
+ and isinstance(first.value, ast.Constant)
218
+ and isinstance(first.value.value, str)
219
+ ):
220
+ start = first.lineno
221
+ end = getattr(first, "end_lineno", start) or start
222
+ ranges.append((start, end))
223
+ return ranges
224
+
225
+
226
+ def _has_documented_count_marker(lines: list[str], lineno_1based: int) -> bool:
227
+ """F9f: look within 3 preceding non-blank lines for a comment marker
228
+ indicating the literal is a documented count."""
229
+ inspected = 0
230
+ j = lineno_1based - 2 # 0-based previous line
231
+ while j >= 0 and inspected < 3:
232
+ ln = lines[j].strip()
233
+ if not ln:
234
+ j -= 1
235
+ continue
236
+ inspected += 1
237
+ lowered = ln.lower()
238
+ if lowered.startswith("#") or lowered.startswith("//"):
239
+ for marker in _DOCUMENTED_COUNT_MARKERS:
240
+ if marker in lowered:
241
+ return True
242
+ j -= 1
243
+ return False
244
+
245
+
246
+ def assess_magic_numbers(
247
+ file_path: str,
248
+ content: str,
249
+ ) -> list[GateFinding]:
250
+ """Cluster 21: Detect hardcoded numeric literals in business logic.
251
+
252
+ F9f refinements:
253
+ - Always skip 0, 1, 2, -1.
254
+ - Skip literals that are the RHS of an UPPER_CASE constant assignment
255
+ (AST-based).
256
+ - Skip literals preceded by a `# C1..CN` / `# N clusters` / similar
257
+ "documented count" comment marker within 3 lines.
258
+ - Skip literals inside docstrings (AST-based).
259
+ """
260
+ import re
261
+
262
+ lang = detect_language(file_path)
263
+ if lang not in ("python", "javascript", "typescript"):
264
+ return [] # NOT_APPLICABLE
265
+
266
+ if not content.strip():
267
+ return [] # NOT_APPLICABLE
268
+
269
+ basename = file_path.replace("\\", "/").split("/")[-1]
270
+ if basename.startswith("test_") or basename.startswith("conftest"):
271
+ return [] # NOT_APPLICABLE
272
+
273
+ # F9f AST pre-pass (Python only — JS/TS fall back to heuristics).
274
+ const_assign_lines: set[int] = set()
275
+ docstring_ranges: list[tuple[int, int]] = []
276
+ if lang == "python":
277
+ const_assign_lines = _collect_constant_assignment_lines(content)
278
+ docstring_ranges = _collect_docstring_ranges_for_magic(content)
279
+
280
+ def _in_docstring(lineno_1based: int) -> bool:
281
+ return any(s <= lineno_1based <= e for s, e in docstring_ranges)
282
+
283
+ all_lines = content.splitlines()
284
+
285
+ findings: list[GateFinding] = []
286
+ for i, line in enumerate(all_lines, 1):
287
+ stripped = line.strip()
288
+ if not stripped or stripped.startswith("#") or stripped.startswith("//"):
289
+ continue
290
+ # F9f-tighten (2026-04-23): the old regex ``^[A-Z_]+\s*[=:]`` skipped
291
+ # any UPPER-prefixed assignment unconditionally. For Python we now
292
+ # defer to the AST pre-pass (``const_assign_lines``) which is precise
293
+ # about RHS shape. For JS/TS we keep the legacy regex as a
294
+ # best-effort heuristic (no AST pre-pass available).
295
+ if lang != "python" and re.match(r"^[A-Z_][A-Z_0-9]*\s*[=:]", stripped):
296
+ continue
297
+ if stripped.startswith(("import ", "from ", "@", '"""', "'''")):
298
+ continue
299
+ if stripped.startswith(("'", '"', 'f"', "f'", 'b"', "b'", 'r"', "r'")):
300
+ continue
301
+
302
+ # F9f: line inside docstring range → skip entirely.
303
+ if _in_docstring(i):
304
+ continue
305
+
306
+ # F9f: line is part of an UPPER_CASE constant assignment → skip.
307
+ if i in const_assign_lines:
308
+ continue
309
+
310
+ # F9f: documented count marker in preceding 3 lines → skip line.
311
+ if _has_documented_count_marker(all_lines, i):
312
+ continue
313
+
314
+ for m in re.finditer(r"\b(\d+(?:\.\d+)?)\b", line):
315
+ try:
316
+ val = float(m.group(1))
317
+ int_val = int(val) if val == int(val) else None
318
+ except (ValueError, OverflowError):
319
+ continue
320
+
321
+ # F9f: 0, 1, 2, -1 always skipped.
322
+ if int_val is not None and int_val in _ALWAYS_SAFE_NUMBERS:
323
+ continue
324
+
325
+ if int_val is not None and int_val in _SAFE_NUMBERS:
326
+ continue
327
+ # FP-round2-B (2026-06-28): raise the small-int suppression bound.
328
+ # On real codebases the vast majority of bare small integers are
329
+ # benign (terminal widths like 24/80, ASCII control codes like 127,
330
+ # byte values, small column/limit counts like 11/12/20/50). The
331
+ # old window was only -10..10, which flagged every such value as a
332
+ # "magic number" and dominated the noise on click/mcp/filelock.
333
+ # We now suppress |int| < _MAGIC_INT_BOUND (256). Genuinely unusual
334
+ # magic constants (timeouts in seconds like 86400, bit masks like
335
+ # 65537, large sizes) are >= 256 and stay flagged. HTTP codes,
336
+ # powers of two up to 4096, and time constants are still covered
337
+ # explicitly by _SAFE_NUMBERS above.
338
+ if int_val is not None and -_MAGIC_INT_BOUND < int_val < _MAGIC_INT_BOUND:
339
+ continue
340
+ # FP-round2-B: sub-unit floats (|x| < 1.0) are almost always benign
341
+ # ratios / poll intervals / probabilities (e.g. 0.5, 0.1) rather
342
+ # than load-bearing magic constants. Suppress them conservatively.
343
+ if int_val is None:
344
+ try:
345
+ if abs(val) < 1.0:
346
+ continue
347
+ except (TypeError, ValueError):
348
+ pass
349
+ col = m.start()
350
+ pre = line[:col]
351
+ if pre.count('"') % 2 == 1 or pre.count("'") % 2 == 1:
352
+ continue
353
+ if re.search(r"range\s*\(|enumerate\s*\(|\[\s*$", pre[-20:] if len(pre) >= 20 else pre):
354
+ continue
355
+ if "field(" in line or "= field(" in line:
356
+ continue
357
+ if re.search(r'\w+\s*=\s*' + re.escape(m.group(1)) + r'\b', line):
358
+ continue
359
+ # F9f-tighten (2026-04-23): narrow the bracket/colon suppression.
360
+ # Old: ``[:N`` / ``N:`` anywhere in line — which suppressed
361
+ # legitimate threshold checks like ``if file_count > 2000:``.
362
+ # New: only suppress when the literal is inside an index/slice
363
+ # expression, i.e. wrapped by ``[ ... ]`` with ``:`` adjacent.
364
+ # Patterns that must still be suppressed:
365
+ # foo[42] — subscript index
366
+ # foo[42:] — open-end slice
367
+ # foo[:42] — open-start slice
368
+ # foo[10:42] — bounded slice
369
+ # Pattern that MUST flag (new):
370
+ # if x > 2000: — trailing colon is statement terminator
371
+ lit = re.escape(m.group(1))
372
+ # subscript: [...lit...] — scan for a nearest-preceding '[' and
373
+ # a following ']' without an intervening statement boundary.
374
+ # We keep it simple: check whether the literal is inside any
375
+ # ``[...]`` span on this line. A Python statement never has a
376
+ # trailing ``:`` inside brackets.
377
+ in_brackets = False
378
+ open_cnt = 0
379
+ lit_start = m.start()
380
+ lit_end = m.end()
381
+ for k, ch in enumerate(line):
382
+ if ch == "[":
383
+ open_cnt += 1
384
+ elif ch == "]":
385
+ open_cnt = max(0, open_cnt - 1)
386
+ if k == lit_start and open_cnt > 0:
387
+ in_brackets = True
388
+ break
389
+ if in_brackets:
390
+ continue
391
+ # Dict-key / dict-value context: ``{42: ...}`` or ``{...: 42}``.
392
+ # Suppress only when braces genuinely enclose the literal.
393
+ in_braces = False
394
+ brace_cnt = 0
395
+ for k, ch in enumerate(line):
396
+ if ch == "{":
397
+ brace_cnt += 1
398
+ elif ch == "}":
399
+ brace_cnt = max(0, brace_cnt - 1)
400
+ if k == lit_start and brace_cnt > 0:
401
+ in_braces = True
402
+ break
403
+ if in_braces:
404
+ continue
405
+
406
+ findings.append(build_finding(
407
+ check_id="magic_number_scan",
408
+ category=GateCategory.RUNTIME_BEHAVIOR,
409
+ title=f"[magic_numbers] {file_path}:{i}",
410
+ severity=GateSeverity.LOW,
411
+ impact=GateImpact.WARN,
412
+ summary=f"Magic number {m.group(1)} at line {i} -- consider naming as a constant",
413
+ recommendation=f"Extract magic number {m.group(1)} into a named constant.",
414
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"Magic number {m.group(1)} at line {i} -- consider naming as a constant", ok=False),),
415
+ repair_kind=RepairKind.REFACTOR.value,
416
+ executor_action=f"Extract magic number at {file_path}:{i} into a named constant",
417
+ ))
418
+ if len(findings) >= 20:
419
+ break
420
+ return findings[:20]
421
+
422
+
423
+ # ---------------------------------------------------------------------------
424
+ # Cluster 22: Error Message Quality
425
+ # ---------------------------------------------------------------------------
426
+
427
+
428
+ def assess_error_message_quality(
429
+ file_path: str,
430
+ content: str,
431
+ ) -> list[GateFinding]:
432
+ """Cluster 22: Detect generic/unhelpful error messages in raise/throw statements."""
433
+ import re
434
+
435
+ if not content.strip():
436
+ return [] # NOT_APPLICABLE
437
+
438
+ basename = file_path.replace("\\", "/").split("/")[-1]
439
+ if basename.startswith("test_"):
440
+ return [] # NOT_APPLICABLE
441
+
442
+ _GENERIC_MESSAGES = (
443
+ r'raise\s+\w+Error\s*\(\s*["\'](?:error|failed|bad|invalid|wrong|oops|problem|issue)["\']',
444
+ r'raise\s+Exception\s*\(\s*["\'][^"\']{0,10}["\']',
445
+ r'raise\s+\w+Error\s*\(\s*\)\s*$',
446
+ r'raise\s+Exception\s*\(\s*\)\s*$',
447
+ )
448
+
449
+ findings: list[GateFinding] = []
450
+ for i, line in enumerate(content.splitlines(), 1):
451
+ stripped = line.strip()
452
+ if stripped.startswith("#"):
453
+ continue
454
+ for pattern in _GENERIC_MESSAGES:
455
+ if re.search(pattern, stripped, re.IGNORECASE):
456
+ findings.append(build_finding(
457
+ check_id="error_msg_scan",
458
+ category=GateCategory.REPORTING,
459
+ title=f"[error_message_quality] {file_path}:{i}",
460
+ severity=GateSeverity.LOW,
461
+ impact=GateImpact.WARN,
462
+ summary=f"Generic error message at line {i}: {stripped[:60]}",
463
+ recommendation="Use descriptive error messages that include context (variable values, expected vs actual).",
464
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"Generic error message at line {i}: {stripped[:60]}", ok=False),),
465
+ repair_kind=RepairKind.REFACTOR.value,
466
+ executor_action=f"Improve error message at {file_path}:{i}",
467
+ ))
468
+ break
469
+ return findings
470
+
471
+
472
+ # ---------------------------------------------------------------------------
473
+ # Cluster 24: Naming Consistency
474
+ # ---------------------------------------------------------------------------
475
+
476
+
477
+ def assess_naming_consistency(
478
+ file_path: str,
479
+ content: str,
480
+ ) -> list[GateFinding]:
481
+ """Cluster 24: Detect mixed naming conventions (camelCase vs snake_case) in one module."""
482
+ import re
483
+
484
+ lang = detect_language(file_path)
485
+ if lang not in ("python", "javascript", "typescript"):
486
+ return [] # NOT_APPLICABLE
487
+
488
+ if not content.strip():
489
+ return [] # NOT_APPLICABLE
490
+
491
+ func_names = re.findall(r"(?:^|\n)\s*def (\w+)\s*\(", content)
492
+ if len(func_names) < 3:
493
+ return [] # NOT_APPLICABLE
494
+
495
+ snake = []
496
+ camel = []
497
+ for name in func_names:
498
+ if name.startswith("_"):
499
+ name = name.lstrip("_")
500
+ if not name:
501
+ continue
502
+ if name == name.lower():
503
+ snake.append(name)
504
+ elif re.match(r"[a-z][a-zA-Z0-9]*$", name) and any(c.isupper() for c in name):
505
+ camel.append(name)
506
+
507
+ total = len(snake) + len(camel)
508
+ if total == 0:
509
+ return [] # NOT_APPLICABLE
510
+
511
+ snake_ratio = len(snake) / total if total else 0
512
+
513
+ findings: list[GateFinding] = []
514
+ if snake_ratio > 0.8 and camel:
515
+ for name in camel:
516
+ findings.append(build_finding(
517
+ check_id="naming_scan",
518
+ category=GateCategory.DRIFT,
519
+ title=f"[naming_consistency] {file_path}:{name}",
520
+ severity=GateSeverity.LOW,
521
+ impact=GateImpact.WARN,
522
+ summary=f"camelCase '{name}' in snake_case-dominant module",
523
+ recommendation="Rename to snake_case to match the dominant convention.",
524
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"camelCase '{name}' in snake_case-dominant module", ok=False),),
525
+ repair_kind=RepairKind.REFACTOR.value,
526
+ executor_action=f"Rename '{name}' to snake_case in {file_path}",
527
+ ))
528
+ elif snake_ratio < 0.2 and snake:
529
+ for name in snake[:10]:
530
+ findings.append(build_finding(
531
+ check_id="naming_scan",
532
+ category=GateCategory.DRIFT,
533
+ title=f"[naming_consistency] {file_path}:{name}",
534
+ severity=GateSeverity.LOW,
535
+ impact=GateImpact.WARN,
536
+ summary=f"snake_case '{name}' in camelCase-dominant module",
537
+ recommendation="Rename to camelCase to match the dominant convention.",
538
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"snake_case '{name}' in camelCase-dominant module", ok=False),),
539
+ repair_kind=RepairKind.REFACTOR.value,
540
+ executor_action=f"Rename '{name}' to camelCase in {file_path}",
541
+ ))
542
+ elif 0.3 <= snake_ratio <= 0.7:
543
+ findings.append(build_finding(
544
+ check_id="naming_scan",
545
+ category=GateCategory.DRIFT,
546
+ title=f"[naming_consistency] {file_path}",
547
+ severity=GateSeverity.LOW,
548
+ impact=GateImpact.WARN,
549
+ summary=f"Mixed naming: {len(snake)} snake_case + {len(camel)} camelCase",
550
+ recommendation="Standardize on one naming convention (prefer snake_case for Python).",
551
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"Mixed naming: {len(snake)} snake_case + {len(camel)} camelCase", ok=False),),
552
+ repair_kind=RepairKind.REFACTOR.value,
553
+ executor_action=f"Standardize naming convention in {file_path}",
554
+ ))
555
+ return findings
556
+
557
+
558
+ # ---------------------------------------------------------------------------
559
+ # Cluster 26: TODO/FIXME Tracker
560
+ # ---------------------------------------------------------------------------
561
+
562
+
563
+ _TECH_DEBT_MARKERS = ("TODO", "FIXME", "HACK", "XXX", "TEMP", "WORKAROUND", "KLUDGE")
564
+
565
+
566
+ def assess_todo_debt(
567
+ file_path: str,
568
+ content: str,
569
+ max_per_file: int = 5,
570
+ ) -> list[GateFinding]:
571
+ """Cluster 26: Track TODO/FIXME/HACK comments as tech debt inventory.
572
+
573
+ Individual TODOs are info findings. More than max_per_file = warn finding.
574
+ Returns findings for each marker found (as info-level), plus a warn finding
575
+ if count exceeds threshold.
576
+ """
577
+ import re
578
+
579
+ if not content.strip():
580
+ return [] # NOT_APPLICABLE
581
+
582
+ # F14c sub-fix 1: skip lines inside UPPER_CASE module-level container
583
+ # literals (e.g. ``_TECH_DEBT_MARKERS = ("TODO", "FIXME")``). The gate
584
+ # must not self-match on its own marker definitions.
585
+ skip_lines = set(collect_constant_container_literal_lines(content))
586
+ # F14c extra: also skip interior lines of multi-line string constants
587
+ # (docstrings that explain marker patterns). Reuses F14a helper.
588
+ skip_lines |= set(collect_string_constant_line_ranges(content))
589
+
590
+ found: list[tuple[int, str, str]] = []
591
+ for i, line in enumerate(content.splitlines(), 1):
592
+ if i in skip_lines:
593
+ continue
594
+ # F14c sub-fix 2: skip visual section-header separator comments such
595
+ # as ``# --- section ---`` or ``# === Legacy Debt (C53) ===``.
596
+ if is_section_header_comment(line):
597
+ continue
598
+ for marker in _TECH_DEBT_MARKERS:
599
+ if re.search(rf"\b{marker}\b", line, re.IGNORECASE):
600
+ found.append((i, marker, line.strip()[:80]))
601
+ break
602
+
603
+ if not found:
604
+ return [] # PASS
605
+
606
+ # Always return a finding per marker (info-level)
607
+ findings: list[GateFinding] = []
608
+ for line_num, marker, text in found:
609
+ findings.append(build_finding(
610
+ check_id="todo_scan",
611
+ category=GateCategory.REPORTING,
612
+ title=f"[todo_debt] {file_path}:{line_num}",
613
+ severity=GateSeverity.INFO,
614
+ impact=GateImpact.WARN,
615
+ summary=f"[{marker}] {text}",
616
+ recommendation="Address tech debt marker or convert to a tracked issue.",
617
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"[{marker}] {text}", ok=len(found) <= max_per_file),),
618
+ repair_kind=RepairKind.REFACTOR.value,
619
+ executor_action=f"Address {marker} at {file_path}:{line_num}",
620
+ ))
621
+
622
+ # If over threshold, add a warn-level summary finding
623
+ if len(found) > max_per_file:
624
+ findings.append(build_finding(
625
+ check_id="todo_scan",
626
+ category=GateCategory.REPORTING,
627
+ title=f"[todo_debt] {file_path}: {len(found)} markers exceed threshold",
628
+ severity=GateSeverity.MEDIUM,
629
+ impact=GateImpact.REVISE,
630
+ summary=f"{len(found)} tech debt markers in {file_path} (threshold: {max_per_file})",
631
+ recommendation=f"Reduce TODO/FIXME count below {max_per_file} per file.",
632
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"{len(found)} markers exceed threshold {max_per_file}", ok=False),),
633
+ repair_kind=RepairKind.REFACTOR.value,
634
+ executor_action=f"Reduce tech debt markers in {file_path}",
635
+ ))
636
+ return findings
637
+
638
+
639
+ # ---------------------------------------------------------------------------
640
+ # Cluster 28: Log Level Appropriateness
641
+ # ---------------------------------------------------------------------------
642
+
643
+
644
+ def assess_log_level_quality(
645
+ file_path: str,
646
+ content: str,
647
+ ) -> list[GateFinding]:
648
+ """Cluster 28: Detect mismatched log levels vs message severity."""
649
+ import re
650
+
651
+ if not content.strip():
652
+ return [] # NOT_APPLICABLE
653
+
654
+ basename = file_path.replace("\\", "/").split("/")[-1]
655
+ if basename.startswith("test_"):
656
+ return [] # NOT_APPLICABLE
657
+
658
+ _ERROR_KEYWORDS = ("error", "fail", "crash", "fatal", "critical", "exception", "broken", "corrupt")
659
+ _DEBUG_LEVELS = ("debug", "trace")
660
+ _INFO_LEVELS = ("info",)
661
+
662
+ findings: list[GateFinding] = []
663
+ for i, line in enumerate(content.splitlines(), 1):
664
+ stripped = line.strip()
665
+ if stripped.startswith("#"):
666
+ continue
667
+
668
+ m = re.search(r"\b(?:log(?:ger)?|_log|logging)\.(debug|info|warning|error|critical)\s*\(\s*[f\"'](.{5,80})", stripped, re.IGNORECASE)
669
+ if not m:
670
+ continue
671
+
672
+ level = m.group(1).lower()
673
+ msg_preview = m.group(2).lower()
674
+
675
+ if level in _DEBUG_LEVELS or level in _INFO_LEVELS:
676
+ if any(kw in msg_preview for kw in _ERROR_KEYWORDS):
677
+ _EXPECTED_FAILURE_PATTERNS = (
678
+ "failed to", "could not", "unable to", "cannot ",
679
+ "timeout", "timed out", "not found", "not available",
680
+ "skipping", "falling back", "does not exist",
681
+ "missing", "unavailable", "unreachable",
682
+ "ignored", "discarded", "dropped", "closed",
683
+ "no longer", "already ", "stale",
684
+ )
685
+ if any(pat in msg_preview for pat in _EXPECTED_FAILURE_PATTERNS):
686
+ continue
687
+ findings.append(build_finding(
688
+ check_id="log_level_scan",
689
+ category=GateCategory.REPORTING,
690
+ title=f"[log_level_quality] {file_path}:{i}",
691
+ severity=GateSeverity.LOW,
692
+ impact=GateImpact.WARN,
693
+ summary=f"log.{level}() with error-severity message at line {i}",
694
+ recommendation=f"Use log.error() or log.warning() for error-severity messages.",
695
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"log.{level}() with error-severity message at line {i}", ok=False),),
696
+ repair_kind=RepairKind.REFACTOR.value,
697
+ executor_action=f"Change log level at {file_path}:{i}",
698
+ ))
699
+
700
+ if level in ("error", "critical"):
701
+ _NORMAL_KEYWORDS = ("start", "ready", "success", "loaded", "initialized", "connected", "listening")
702
+ if any(kw in msg_preview for kw in _NORMAL_KEYWORDS) and not any(kw in msg_preview for kw in _ERROR_KEYWORDS):
703
+ findings.append(build_finding(
704
+ check_id="log_level_scan",
705
+ category=GateCategory.REPORTING,
706
+ title=f"[log_level_quality] {file_path}:{i}",
707
+ severity=GateSeverity.LOW,
708
+ impact=GateImpact.WARN,
709
+ summary=f"log.{level}() with normal-severity message at line {i}",
710
+ recommendation=f"Use log.info() for normal/success messages.",
711
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=f"log.{level}() with normal-severity message at line {i}", ok=False),),
712
+ repair_kind=RepairKind.REFACTOR.value,
713
+ executor_action=f"Change log level at {file_path}:{i}",
714
+ ))
715
+ return findings
716
+
717
+
718
+ # ---------------------------------------------------------------------------
719
+ # Cluster 29: File Encoding Consistency
720
+ # ---------------------------------------------------------------------------
721
+
722
+
723
+ def assess_encoding_consistency(
724
+ file_path: str,
725
+ raw_bytes: bytes,
726
+ ) -> list[GateFinding]:
727
+ """Cluster 29: Check file encoding, BOM, and line ending consistency."""
728
+ if not raw_bytes:
729
+ return [] # NOT_APPLICABLE
730
+
731
+ findings: list[GateFinding] = []
732
+
733
+ if raw_bytes.startswith(b"\xef\xbb\xbf"):
734
+ findings.append(build_finding(
735
+ check_id="encoding_scan",
736
+ category=GateCategory.CONTRACT,
737
+ title=f"[encoding_consistency] {file_path}:BOM",
738
+ severity=GateSeverity.LOW,
739
+ impact=GateImpact.WARN,
740
+ summary="UTF-8 BOM detected -- most tools/editors don't need BOM for UTF-8",
741
+ recommendation="Remove the UTF-8 BOM from the file.",
742
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail="UTF-8 BOM detected -- most tools/editors don't need BOM for UTF-8", ok=False),),
743
+ repair_kind=RepairKind.FIX_ENCODING.value,
744
+ executor_action=f"Remove BOM from {file_path}",
745
+ ))
746
+
747
+ try:
748
+ raw_bytes.decode("utf-8")
749
+ except UnicodeDecodeError:
750
+ findings.append(build_finding(
751
+ check_id="encoding_scan",
752
+ category=GateCategory.CONTRACT,
753
+ title=f"[encoding_consistency] {file_path}:encoding",
754
+ severity=GateSeverity.MEDIUM,
755
+ impact=GateImpact.REVISE,
756
+ summary="File is not valid UTF-8 -- may be Latin-1 or CP1252",
757
+ recommendation="Re-encode the file as UTF-8.",
758
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail="File is not valid UTF-8 -- may be Latin-1 or CP1252", ok=False),),
759
+ repair_kind=RepairKind.FIX_ENCODING.value,
760
+ executor_action=f"Re-encode {file_path} as UTF-8",
761
+ ))
762
+
763
+ has_crlf = b"\r\n" in raw_bytes
764
+ lf_only = raw_bytes.replace(b"\r\n", b"")
765
+ has_bare_lf = b"\n" in lf_only
766
+ has_bare_cr = b"\r" in lf_only
767
+
768
+ if has_crlf and has_bare_lf:
769
+ findings.append(build_finding(
770
+ check_id="encoding_scan",
771
+ category=GateCategory.CONTRACT,
772
+ title=f"[encoding_consistency] {file_path}:line_endings",
773
+ severity=GateSeverity.LOW,
774
+ impact=GateImpact.WARN,
775
+ summary="Mixed line endings: both CRLF and LF detected",
776
+ recommendation="Normalize to LF line endings.",
777
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail="Mixed line endings: both CRLF and LF detected", ok=False),),
778
+ repair_kind=RepairKind.FIX_ENCODING.value,
779
+ executor_action=f"Normalize line endings in {file_path}",
780
+ ))
781
+ if has_bare_cr:
782
+ findings.append(build_finding(
783
+ check_id="encoding_scan",
784
+ category=GateCategory.CONTRACT,
785
+ title=f"[encoding_consistency] {file_path}:line_endings",
786
+ severity=GateSeverity.LOW,
787
+ impact=GateImpact.WARN,
788
+ summary="Old Mac-style CR line endings detected",
789
+ recommendation="Normalize to LF line endings.",
790
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail="Old Mac-style CR line endings detected", ok=False),),
791
+ repair_kind=RepairKind.FIX_ENCODING.value,
792
+ executor_action=f"Normalize line endings in {file_path}",
793
+ ))
794
+
795
+ if b"\x00" in raw_bytes[:1000]:
796
+ findings.append(build_finding(
797
+ check_id="encoding_scan",
798
+ category=GateCategory.CONTRACT,
799
+ title=f"[encoding_consistency] {file_path}:null_bytes",
800
+ severity=GateSeverity.MEDIUM,
801
+ impact=GateImpact.REVISE,
802
+ summary="Null bytes in file -- may be binary file with text extension",
803
+ recommendation="Remove null bytes or use a binary-safe encoding.",
804
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail="Null bytes in file -- may be binary file with text extension", ok=False),),
805
+ repair_kind=RepairKind.FIX_ENCODING.value,
806
+ executor_action=f"Remove null bytes from {file_path}",
807
+ ))
808
+ return findings