vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,629 @@
1
+ """AST-based helpers shared across line-based forensic gates.
2
+
3
+ Motivation (F14a, 2026-04-23)
4
+ ----------------------------
5
+ Several "AST-sounding" gates (``test_quality_scan``, ``dead_code_scan``,
6
+ ``unreachable_scan``) are implemented as line-based regex scans over
7
+ ``content.splitlines()``. Those scans cannot distinguish between real Python
8
+ source and source that appears *inside a string literal* (test fixtures,
9
+ embedded code examples in docstrings, scripted-generation tests, etc.).
10
+
11
+ Example false positive
12
+ ~~~~~~~~~~~~~~~~~~~~~~
13
+ Inside a test file::
14
+
15
+ FIXTURE_CODE = '''
16
+ def func_c():
17
+ return x
18
+ dead_line()
19
+ '''
20
+
21
+ The line-based ``unreachable_scan`` regex saw ``return x`` on one line
22
+ followed by statements on the next line at the same indent and flagged
23
+ the fixture body as unreachable code. But those bytes are a string literal,
24
+ not real code.
25
+
26
+ Fix shape
27
+ ~~~~~~~~~
28
+ ``collect_string_constant_line_ranges(source)`` parses the source once via
29
+ ``ast.parse`` and returns the set of 1-based line numbers that are covered
30
+ by any string ``Constant`` node or ``JoinedStr`` (f-string) node. Line-based
31
+ gates then skip matches whose line is in this set.
32
+
33
+ The helper is AST-only — no regex over source text — and degrades gracefully
34
+ to an empty ``frozenset()`` when ``ast.parse`` raises ``SyntaxError``, which
35
+ preserves prior gate behavior for unparseable files. Non-Python files are
36
+ expected to be rejected upstream by ``detect_language(...) != "python"``;
37
+ calling this helper with non-Python source is not an error but will almost
38
+ certainly fail to parse and produce an empty result (safe default).
39
+
40
+ The helper is **intentionally** conservative in what it excludes:
41
+
42
+ * Only the lines spanned by ``ast.Constant(value=str)`` and ``ast.JoinedStr``.
43
+ * Pure ``ast.Expression`` docstrings appear as ``Constant(str)`` already.
44
+ * Byte strings (``b"..."``) are ``Constant(value=bytes)`` and are NOT
45
+ excluded — they cannot host Python source interpretation anyway.
46
+ * String-typed *annotations* (forward refs) are string constants but they
47
+ span a single token; excluding them causes no false negatives because
48
+ they do not contain statement-level code.
49
+
50
+ No reachable code ever lives inside a ``Constant(str)`` or ``JoinedStr``,
51
+ so false-negative risk is zero by construction.
52
+ """
53
+ from __future__ import annotations
54
+
55
+ import ast
56
+ import hashlib
57
+ import re
58
+ from functools import lru_cache
59
+ from typing import Callable, Optional
60
+
61
+ from vigil_forensic._shared import (
62
+ EvidenceReference,
63
+ GateCategory,
64
+ GateFinding,
65
+ GateImpact,
66
+ GateSeverity,
67
+ RepairKind,
68
+ )
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # F14c: Detector self-match suppression helpers
73
+ # ---------------------------------------------------------------------------
74
+ # Shared by text-scanning gates (todo_scan,
75
+ # legacy_compat_debt.stale_migration_marker, debug_print_scan) to avoid
76
+ # "detector self-match" false positives where a gate finds its own pattern
77
+ # definitions in its own source.
78
+
79
+ _UPPER_NAME_RE = re.compile(r"^_?[A-Z][A-Z0-9_]*$")
80
+
81
+ # A comment line used as a visual section separator:
82
+ # # --- section ---
83
+ # # === Legacy Debt (C53) ===
84
+ # # ----- DEBUG -----
85
+ # # -- legacy_debt (C53) --
86
+ # Regular prose comments never match.
87
+ _SECTION_HEADER_COMMENT_RE = re.compile(
88
+ r"""
89
+ ^\s*\#\s*
90
+ (?:
91
+ (?:[-=]{2,})\s*\S.*?\s*(?:[-=]{2,})?
92
+ |
93
+ \S.*?\s*[-=]{2,}
94
+ )
95
+ \s*$
96
+ """,
97
+ re.VERBOSE,
98
+ )
99
+
100
+ # F14c sub-fix 3: files where ``print()`` is legitimate CLI output.
101
+ _CLI_SURFACE_FILE_PREFIXES: tuple[str, ...] = (
102
+ "INTERFACE/cli/",
103
+ )
104
+
105
+ _CLI_SURFACE_FILE_EXACT: frozenset[str] = frozenset({
106
+ "BRAIN/autoforensics/self_audit.py",
107
+ "BRAIN/autoforensics/cli_forensic_audit.py",
108
+ # Protocol-layer output helper — safe_print() wraps print(); flagging its
109
+ # own implementation is a false positive.
110
+ "SYSTEM/execution/pocketcoder_command.py",
111
+ # CLI dispatch entry point — cmd_list() renders project table to stdout;
112
+ # this is user-facing output, not a debug print.
113
+ "SYSTEM/runtime/app.py",
114
+ # Test runner utility — progress banners printed to stdout for operator
115
+ # visibility; not production code.
116
+ "SYSTEM/dev/tests/run_all_stress_tests.py",
117
+ # Map Builder CLI entry — cmd_map_invariants() + _print_reports() emit
118
+ # invariant results to stdout; Category D user-facing output.
119
+ "BRAIN/autoforensics/map_builder/invariant_suite.py",
120
+ })
121
+
122
+ # Filename suffixes that mark a file as a user-facing CLI entrypoint.
123
+ # Convention: ``<feature>/cli_entry.py`` exposes a ``cmd_*`` dispatcher for
124
+ # the Vigil app parser and prints human-readable progress/status.
125
+ _CLI_SURFACE_FILE_SUFFIXES: tuple[str, ...] = (
126
+ "/cli_entry.py",
127
+ )
128
+
129
+ _CLI_FUNC_NAMES: frozenset[str] = frozenset({
130
+ "main", "_main", "cli_main", "_cli_main", "run", "cli", "_cli",
131
+ })
132
+
133
+
134
+ __all__ = [
135
+ "collect_string_constant_line_ranges",
136
+ "line_is_inside_string_constant",
137
+ "collect_constant_container_literal_lines",
138
+ "collect_print_call_line_nums",
139
+ "collect_cli_output_func_line_ranges",
140
+ "is_section_header_comment",
141
+ "is_cli_surface_file",
142
+ "collect_main_block_line_ranges",
143
+ "line_in_ranges",
144
+ "parse_python_source_or_emit_finding",
145
+ "build_syntax_parse_error_finding",
146
+ ]
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # B2 (2026-04-23) -- defensive meta-integrity
151
+ # ---------------------------------------------------------------------------
152
+ # Rationale
153
+ # Historically every "AST gate" in autoforensics opened with:
154
+ # try:
155
+ # tree = ast.parse(source)
156
+ # except SyntaxError:
157
+ # return [] # or ``continue``
158
+ # A real SyntaxError in production code is a REAL BUG, but that try/except
159
+ # made the gate *blind* -- zero findings emitted => file looks clean.
160
+ #
161
+ # Fix shape
162
+ # ``parse_python_source_or_emit_finding`` is a drop-in replacement for the
163
+ # silent try/except. On SyntaxError it calls the caller-supplied
164
+ # ``emit_finding`` hook with a ``meta.syntax_parse_error`` finding, then
165
+ # returns ``None`` so the caller preserves its own control flow.
166
+ #
167
+ # Do NOT use this for helpers that are BY DESIGN syntax-tolerant
168
+ # (``collect_string_constant_line_ranges``, ``collect_main_block_line_ranges``,
169
+ # ``collect_constant_container_literal_lines``) -- those fall back to empty
170
+ # results on purpose and must remain silent.
171
+
172
+ _PYTHON_EXTENSIONS: frozenset[str] = frozenset({".py", ".pyi"})
173
+
174
+
175
+ def _looks_like_python_path(rel_path: str) -> bool:
176
+ """True iff ``rel_path`` looks like a Python source file by extension."""
177
+ if not rel_path:
178
+ return False
179
+ normalized = rel_path.replace("\\", "/").lower()
180
+ dot = normalized.rfind(".")
181
+ if dot < 0:
182
+ return False
183
+ return normalized[dot:] in _PYTHON_EXTENSIONS
184
+
185
+
186
+ def build_syntax_parse_error_finding(
187
+ *,
188
+ rel_path: str,
189
+ exc: SyntaxError,
190
+ emitting_gate: str = "",
191
+ ) -> GateFinding:
192
+ """Construct the canonical ``meta.syntax_parse_error`` GateFinding.
193
+
194
+ Separated from :func:`parse_python_source_or_emit_finding` so tests can
195
+ assert shape without spinning up a parser.
196
+ """
197
+ line_info = f"line {exc.lineno}" if exc.lineno else "unknown line"
198
+ msg = str(exc.msg) if exc.msg else "unknown parse error"
199
+ evidence = (
200
+ EvidenceReference(
201
+ kind="syntax_error",
202
+ path=rel_path,
203
+ detail=f"{line_info}: {msg}"[:512],
204
+ ),
205
+ )
206
+ # Deterministic fingerprint: (path, line) - if same file + same line has
207
+ # two gates each emit meta.syntax_parse_error, they share a fingerprint
208
+ # and self-audit dedup can collapse them downstream.
209
+ fp_source = f"meta.syntax_parse_error|{rel_path}|{exc.lineno}"
210
+ fingerprint = hashlib.sha256(fp_source.encode("utf-8")).hexdigest()[:16]
211
+ emitter_tag = f" [emitted by {emitting_gate}]" if emitting_gate else ""
212
+ return GateFinding(
213
+ check_id="meta.syntax_parse_error",
214
+ category=GateCategory.META,
215
+ title=f"Python syntax error in {rel_path} ({line_info})",
216
+ severity=GateSeverity.HIGH,
217
+ impact=GateImpact.REVISE,
218
+ summary=(
219
+ f"{rel_path}:{exc.lineno}: {msg}. Autoforensics gate could not "
220
+ f"parse this file and skipped its checks for this path.{emitter_tag}"
221
+ ),
222
+ recommendation=(
223
+ "Fix the Python syntax error so gates can parse and audit this "
224
+ "file. A silent skip hides real bugs from the audit."
225
+ ),
226
+ evidence=evidence,
227
+ fingerprint=fingerprint,
228
+ repair_kind=RepairKind.FIX_SYNTAX.value,
229
+ executor_action="fix Python syntax error",
230
+ proof_required="ast.parse succeeds on the file",
231
+ allowlist_allowed=False,
232
+ preferred_fix_shape="restore valid Python grammar; do not silence via except",
233
+ )
234
+
235
+
236
+ def parse_python_source_or_emit_finding(
237
+ source: str,
238
+ *,
239
+ rel_path: str,
240
+ emit_finding: Optional[Callable[[GateFinding], None]] = None,
241
+ emitting_gate: str = "",
242
+ filename: str | None = None,
243
+ ) -> ast.Module | None:
244
+ """Parse Python source and return the AST module, or emit a meta finding.
245
+
246
+ Behavior:
247
+ * On success: returns the ``ast.Module``.
248
+ * On ``SyntaxError``: if ``emit_finding`` was provided, calls it with a
249
+ ``meta.syntax_parse_error`` finding, then returns ``None``. Caller is
250
+ responsible for mirroring its own control flow (``return``/``continue``).
251
+ * When ``emit_finding is None`` (unit tests, utility helpers): no
252
+ side-effects on error; simply returns ``None``.
253
+ """
254
+ if not source:
255
+ return None
256
+ try:
257
+ return ast.parse(source, filename=filename or rel_path or "<unknown>")
258
+ except SyntaxError as exc:
259
+ if emit_finding is not None and _looks_like_python_path(rel_path):
260
+ try:
261
+ emit_finding(
262
+ build_syntax_parse_error_finding(
263
+ rel_path=rel_path,
264
+ exc=exc,
265
+ emitting_gate=emitting_gate,
266
+ )
267
+ )
268
+ except Exception: # noqa: BLE001 -- never crash a gate on emit failure
269
+ pass
270
+ return None
271
+
272
+
273
+ def _collect_impl(source: str) -> frozenset[int]:
274
+ try:
275
+ tree = ast.parse(source)
276
+ except (SyntaxError, ValueError):
277
+ # ValueError catches things like source containing a null byte.
278
+ return frozenset()
279
+
280
+ lines: set[int] = set()
281
+ for node in ast.walk(tree):
282
+ # ast.Constant(value=str) — covers plain "..."/'...', triple-quoted
283
+ # """...""", docstrings, and string-type forward refs.
284
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
285
+ start = getattr(node, "lineno", None)
286
+ end = getattr(node, "end_lineno", None)
287
+ if start is None or end is None:
288
+ continue
289
+ start = int(start)
290
+ end = int(end)
291
+ # Only the *interior* lines of a multi-line string are purely
292
+ # inside the literal. The opening line (``x = '''``) and the
293
+ # closing line (``'''``) may carry real code before the opening
294
+ # quote or after the closing quote (e.g. ``raise ValueError("bad")``
295
+ # is a single-line string on a real statement line). A single-line
296
+ # string (start == end) contributes no excluded line.
297
+ if end - start < 2:
298
+ continue
299
+ for ln in range(start + 1, end):
300
+ lines.add(ln)
301
+ continue
302
+
303
+ # ast.JoinedStr — f-strings. Same interior-only rule: only middle
304
+ # lines of a multi-line f-string are purely string content.
305
+ if isinstance(node, ast.JoinedStr):
306
+ start = getattr(node, "lineno", None)
307
+ end = getattr(node, "end_lineno", None)
308
+ if start is None or end is None:
309
+ continue
310
+ start = int(start)
311
+ end = int(end)
312
+ if end - start < 2:
313
+ continue
314
+ for ln in range(start + 1, end):
315
+ lines.add(ln)
316
+ continue
317
+
318
+ return frozenset(lines)
319
+
320
+
321
+ @lru_cache(maxsize=256)
322
+ def _collect_cached(source: str) -> frozenset[int]:
323
+ """LRU-cached parse. Keyed on the full source string so repeated calls
324
+ during a single gate run (multiple regex passes on the same file) parse
325
+ the file exactly once. Cache size is bounded to keep memory flat."""
326
+ return _collect_impl(source)
327
+
328
+
329
+ def collect_string_constant_line_ranges(source: str) -> frozenset[int]:
330
+ """Return 1-based line numbers that fall inside any Python string literal.
331
+
332
+ Covers:
333
+ * single-quoted / double-quoted string constants,
334
+ * triple-quoted string constants (docstrings and plain literals),
335
+ * f-strings (``ast.JoinedStr``).
336
+
337
+ Returns ``frozenset()`` if ``source`` is not valid Python — no safe
338
+ exclusions means unchanged prior behavior for that file.
339
+
340
+ Intended use
341
+ ------------
342
+ At the top of a line-based gate runner::
343
+
344
+ excluded = collect_string_constant_line_ranges(content)
345
+ for i, line in enumerate(content.splitlines(), 1):
346
+ if i in excluded:
347
+ continue
348
+ ...
349
+
350
+ For regex matches on the whole ``content`` (not per-line), convert the
351
+ match offset to a line number via ``content[:m.start()].count("\\n") + 1``
352
+ and check that line against ``excluded``.
353
+ """
354
+ if not source:
355
+ return frozenset()
356
+ try:
357
+ return _collect_cached(source)
358
+ except TypeError:
359
+ # lru_cache requires hashable; source is str, so this should never
360
+ # happen. Keep defense in depth anyway.
361
+ return _collect_impl(source)
362
+
363
+
364
+ def line_is_inside_string_constant(source: str, lineno: int) -> bool:
365
+ """Convenience wrapper — True iff ``lineno`` is covered by any string literal."""
366
+ return lineno in collect_string_constant_line_ranges(source)
367
+
368
+
369
+ # ---------------------------------------------------------------------------
370
+ # F14c implementations
371
+ # ---------------------------------------------------------------------------
372
+
373
+
374
+ def collect_constant_container_literal_lines(source: str) -> frozenset[int]:
375
+ """F14c sub-fix 1: return line numbers of string literals inside
376
+ UPPER_CASE module-level tuple/list/set/frozenset/dict assignments.
377
+
378
+ Used by text-scanning gates to skip their own marker definitions such as::
379
+
380
+ _TECH_DEBT_MARKERS = ("TODO", "FIXME", "HACK", "XXX")
381
+
382
+ Criteria (AST-based, conservative):
383
+ * ``ast.Assign`` or ``ast.AnnAssign`` at module top level
384
+ * single target: ``ast.Name`` whose ``id`` matches ``_?[A-Z][A-Z0-9_]*``
385
+ * value is ``ast.Tuple``/``ast.List``/``ast.Set``/``ast.Dict``
386
+ OR ``ast.Call(func=Name('frozenset'|'set'|'tuple'|'list'))``
387
+
388
+ For each qualifying container we walk every string ``ast.Constant`` and
389
+ add the inclusive ``lineno..end_lineno`` range to the returned frozenset.
390
+
391
+ Syntax-invalid sources return an empty frozenset (fail-open to avoid
392
+ suppressing real findings on broken files).
393
+ """
394
+ try:
395
+ tree = ast.parse(source)
396
+ except SyntaxError:
397
+ return frozenset()
398
+
399
+ out: set[int] = set()
400
+
401
+ def _string_literal_lines(value: ast.AST) -> None:
402
+ for sub in ast.walk(value):
403
+ if isinstance(sub, ast.Constant) and isinstance(sub.value, str):
404
+ start = int(getattr(sub, "lineno", 0) or 0)
405
+ end = int(getattr(sub, "end_lineno", start) or start)
406
+ if start <= 0:
407
+ continue
408
+ for ln in range(start, end + 1):
409
+ out.add(ln)
410
+
411
+ def _is_container_literal(value: ast.AST) -> bool:
412
+ if isinstance(value, (ast.Tuple, ast.List, ast.Set, ast.Dict)):
413
+ return True
414
+ if isinstance(value, ast.Call) and isinstance(value.func, ast.Name):
415
+ if value.func.id in ("frozenset", "set", "tuple", "list"):
416
+ return True
417
+ return False
418
+
419
+ for node in ast.iter_child_nodes(tree):
420
+ if isinstance(node, ast.Assign):
421
+ if len(node.targets) != 1:
422
+ continue
423
+ target = node.targets[0]
424
+ if not isinstance(target, ast.Name):
425
+ continue
426
+ if not _UPPER_NAME_RE.match(target.id):
427
+ continue
428
+ if _is_container_literal(node.value):
429
+ _string_literal_lines(node.value)
430
+ continue
431
+
432
+ if isinstance(node, ast.AnnAssign):
433
+ target = node.target
434
+ if not isinstance(target, ast.Name):
435
+ continue
436
+ if not _UPPER_NAME_RE.match(target.id):
437
+ continue
438
+ if node.value is not None and _is_container_literal(node.value):
439
+ _string_literal_lines(node.value)
440
+ continue
441
+
442
+ return frozenset(out)
443
+
444
+
445
+ def collect_print_call_line_nums(source: str) -> frozenset[int]:
446
+ """Return 1-based line numbers of genuine Python ``print(...)`` CALLS.
447
+
448
+ Precision fix for ``debug_print_scan``: a substring/regex match on
449
+ ``print(`` also fires on the token inside a *string literal* (e.g. a
450
+ detector pattern tuple ``(... "print(", ...)``) and on attribute calls
451
+ such as ``self.printer.print(...)``. Walking the AST and keeping only
452
+ ``ast.Call`` nodes whose ``func`` is the bare builtin ``Name(id='print')``
453
+ eliminates both classes of false positive.
454
+
455
+ The reported line number is the line of the ``print`` name token
456
+ (``func.lineno`` when available, else ``call.lineno``) so a multi-line
457
+ call is attributed to its opening line — matching how the gate reports.
458
+
459
+ Syntax-invalid sources return an empty frozenset (fail-open: no AST means
460
+ the caller keeps its prior regex behavior for that file).
461
+ """
462
+ try:
463
+ tree = ast.parse(source)
464
+ except (SyntaxError, ValueError):
465
+ return frozenset()
466
+
467
+ out: set[int] = set()
468
+ for node in ast.walk(tree):
469
+ if not isinstance(node, ast.Call):
470
+ continue
471
+ func = node.func
472
+ # Only the bare builtin ``print`` — NOT ``obj.print`` / ``mod.print``.
473
+ if isinstance(func, ast.Name) and func.id == "print":
474
+ lineno = getattr(func, "lineno", None) or getattr(node, "lineno", None)
475
+ if lineno:
476
+ out.add(int(lineno))
477
+ return frozenset(out)
478
+
479
+
480
+ def collect_cli_output_func_line_ranges(source: str) -> list[tuple[int, int]]:
481
+ """Return inclusive 1-based ``(start, end)`` line ranges for functions that
482
+ are conventionally user-facing CLI/output surfaces, where ``print()`` is
483
+ intentional rather than a stray debug statement.
484
+
485
+ Conservative name rule (documented intentionally narrow):
486
+ * name starts with ``print_`` or ``_print_`` (e.g. ``print_human_summary``,
487
+ ``_print_reports``), OR
488
+ * name is one of the canonical CLI entrypoints in ``_CLI_FUNC_NAMES``
489
+ (``main`` / ``cli`` / ``run`` / ``cli_main`` and underscore variants).
490
+
491
+ Only the *named* function's own body range is returned; a ``print_*``
492
+ function elsewhere in the file therefore does NOT silence a stray
493
+ ``print()`` in an unrelated normal function.
494
+
495
+ Syntax-invalid sources return an empty list (fail-open).
496
+ """
497
+ try:
498
+ tree = ast.parse(source)
499
+ except (SyntaxError, ValueError):
500
+ return []
501
+
502
+ ranges: list[tuple[int, int]] = []
503
+ for node in ast.walk(tree):
504
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
505
+ continue
506
+ name = node.name
507
+ is_output_func = (
508
+ name.startswith("print_")
509
+ or name.startswith("_print_")
510
+ or name in _CLI_FUNC_NAMES
511
+ )
512
+ if not is_output_func:
513
+ continue
514
+ start = int(getattr(node, "lineno", 0) or 0)
515
+ end = int(getattr(node, "end_lineno", start) or start)
516
+ if start > 0:
517
+ ranges.append((start, end))
518
+ return ranges
519
+
520
+
521
+ def is_section_header_comment(line: str) -> bool:
522
+ """F14c sub-fix 2: return True if ``line`` looks like a visual section
523
+ separator comment.
524
+
525
+ Matches::
526
+
527
+ # --- section ---
528
+ # === Legacy Debt (C53) ===
529
+ # -- legacy_debt (C53) --
530
+ # ----- DEBUG -----
531
+ # end ---
532
+
533
+ Regular prose comments (``# this is a normal comment.``) do NOT match.
534
+ """
535
+ if not line:
536
+ return False
537
+ return bool(_SECTION_HEADER_COMMENT_RE.match(line))
538
+
539
+
540
+ def is_cli_surface_file(file_path: str) -> bool:
541
+ """F14c sub-fix 3: return True if ``file_path`` is a user-facing CLI
542
+ surface where ``print()`` is legitimate.
543
+
544
+ Covers:
545
+ * Anything under ``INTERFACE/cli/``
546
+ * ``BRAIN/autoforensics/self_audit.py`` and
547
+ ``BRAIN/autoforensics/cli_forensic_audit.py`` (CLI entrypoints for
548
+ the autoforensics subsystem).
549
+ """
550
+ if not file_path:
551
+ return False
552
+ normalized = file_path.replace("\\", "/").lstrip("./")
553
+ for hub in _CLI_SURFACE_FILE_EXACT:
554
+ if normalized == hub or normalized.endswith("/" + hub):
555
+ return True
556
+ for prefix in _CLI_SURFACE_FILE_PREFIXES:
557
+ if prefix in normalized:
558
+ return True
559
+ for suffix in _CLI_SURFACE_FILE_SUFFIXES:
560
+ if normalized.endswith(suffix) or normalized == suffix.lstrip("/"):
561
+ return True
562
+ return False
563
+
564
+
565
+ def collect_main_block_line_ranges(source: str) -> list[tuple[int, int]]:
566
+ """F14c sub-fix 3: return inclusive line ranges covered by
567
+ ``if __name__ == "__main__":`` blocks at module top level, plus
568
+ conventionally-named CLI entrypoint functions (``main``, ``cli_main``,
569
+ ``run``, ``_cli_*`` etc.).
570
+
571
+ ``print()`` inside any of these ranges is legitimate CLI output.
572
+
573
+ Fail-open: syntax errors return ``[]``.
574
+ """
575
+ try:
576
+ tree = ast.parse(source)
577
+ except SyntaxError:
578
+ return []
579
+
580
+ ranges: list[tuple[int, int]] = []
581
+
582
+ def _is_main_guard(node: ast.AST) -> bool:
583
+ if not isinstance(node, ast.If):
584
+ return False
585
+ test = node.test
586
+ if not isinstance(test, ast.Compare):
587
+ return False
588
+ if len(test.ops) != 1 or not isinstance(test.ops[0], ast.Eq):
589
+ return False
590
+ left = test.left
591
+ right = test.comparators[0]
592
+
593
+ def _is_name(n: ast.AST) -> bool:
594
+ return isinstance(n, ast.Name) and n.id == "__name__"
595
+
596
+ def _is_main_const(n: ast.AST) -> bool:
597
+ return isinstance(n, ast.Constant) and n.value == "__main__"
598
+
599
+ return (_is_name(left) and _is_main_const(right)) or (
600
+ _is_name(right) and _is_main_const(left)
601
+ )
602
+
603
+ for node in ast.iter_child_nodes(tree):
604
+ if _is_main_guard(node):
605
+ start = int(getattr(node, "lineno", 0) or 0)
606
+ end = int(getattr(node, "end_lineno", start) or start)
607
+ if start > 0:
608
+ ranges.append((start, end))
609
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
610
+ if node.name in _CLI_FUNC_NAMES or node.name.startswith("_cli_"):
611
+ start = int(getattr(node, "lineno", 0) or 0)
612
+ end = int(getattr(node, "end_lineno", start) or start)
613
+ if start > 0:
614
+ ranges.append((start, end))
615
+
616
+ return ranges
617
+
618
+
619
+ def line_in_ranges(
620
+ line_num: int,
621
+ ranges: list[tuple[int, int]] | tuple[tuple[int, int], ...],
622
+ ) -> bool:
623
+ """F14c helper: return True if ``line_num`` falls within any inclusive
624
+ ``(start, end)`` range in ``ranges``.
625
+ """
626
+ for start, end in ranges:
627
+ if start <= line_num <= end:
628
+ return True
629
+ return False