vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
"""AST-based helpers shared across line-based forensic gates.
|
|
2
|
+
|
|
3
|
+
Motivation (F14a, 2026-04-23)
|
|
4
|
+
----------------------------
|
|
5
|
+
Several "AST-sounding" gates (``test_quality_scan``, ``dead_code_scan``,
|
|
6
|
+
``unreachable_scan``) are implemented as line-based regex scans over
|
|
7
|
+
``content.splitlines()``. Those scans cannot distinguish between real Python
|
|
8
|
+
source and source that appears *inside a string literal* (test fixtures,
|
|
9
|
+
embedded code examples in docstrings, scripted-generation tests, etc.).
|
|
10
|
+
|
|
11
|
+
Example false positive
|
|
12
|
+
~~~~~~~~~~~~~~~~~~~~~~
|
|
13
|
+
Inside a test file::
|
|
14
|
+
|
|
15
|
+
FIXTURE_CODE = '''
|
|
16
|
+
def func_c():
|
|
17
|
+
return x
|
|
18
|
+
dead_line()
|
|
19
|
+
'''
|
|
20
|
+
|
|
21
|
+
The line-based ``unreachable_scan`` regex saw ``return x`` on one line
|
|
22
|
+
followed by statements on the next line at the same indent and flagged
|
|
23
|
+
the fixture body as unreachable code. But those bytes are a string literal,
|
|
24
|
+
not real code.
|
|
25
|
+
|
|
26
|
+
Fix shape
|
|
27
|
+
~~~~~~~~~
|
|
28
|
+
``collect_string_constant_line_ranges(source)`` parses the source once via
|
|
29
|
+
``ast.parse`` and returns the set of 1-based line numbers that are covered
|
|
30
|
+
by any string ``Constant`` node or ``JoinedStr`` (f-string) node. Line-based
|
|
31
|
+
gates then skip matches whose line is in this set.
|
|
32
|
+
|
|
33
|
+
The helper is AST-only — no regex over source text — and degrades gracefully
|
|
34
|
+
to an empty ``frozenset()`` when ``ast.parse`` raises ``SyntaxError``, which
|
|
35
|
+
preserves prior gate behavior for unparseable files. Non-Python files are
|
|
36
|
+
expected to be rejected upstream by ``detect_language(...) != "python"``;
|
|
37
|
+
calling this helper with non-Python source is not an error but will almost
|
|
38
|
+
certainly fail to parse and produce an empty result (safe default).
|
|
39
|
+
|
|
40
|
+
The helper is **intentionally** conservative in what it excludes:
|
|
41
|
+
|
|
42
|
+
* Only the lines spanned by ``ast.Constant(value=str)`` and ``ast.JoinedStr``.
|
|
43
|
+
* Pure ``ast.Expression`` docstrings appear as ``Constant(str)`` already.
|
|
44
|
+
* Byte strings (``b"..."``) are ``Constant(value=bytes)`` and are NOT
|
|
45
|
+
excluded — they cannot host Python source interpretation anyway.
|
|
46
|
+
* String-typed *annotations* (forward refs) are string constants but they
|
|
47
|
+
span a single token; excluding them causes no false negatives because
|
|
48
|
+
they do not contain statement-level code.
|
|
49
|
+
|
|
50
|
+
No reachable code ever lives inside a ``Constant(str)`` or ``JoinedStr``,
|
|
51
|
+
so false-negative risk is zero by construction.
|
|
52
|
+
"""
|
|
53
|
+
from __future__ import annotations
|
|
54
|
+
|
|
55
|
+
import ast
|
|
56
|
+
import hashlib
|
|
57
|
+
import re
|
|
58
|
+
from functools import lru_cache
|
|
59
|
+
from typing import Callable, Optional
|
|
60
|
+
|
|
61
|
+
from vigil_forensic._shared import (
|
|
62
|
+
EvidenceReference,
|
|
63
|
+
GateCategory,
|
|
64
|
+
GateFinding,
|
|
65
|
+
GateImpact,
|
|
66
|
+
GateSeverity,
|
|
67
|
+
RepairKind,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
# F14c: Detector self-match suppression helpers
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Shared by text-scanning gates (todo_scan,
|
|
75
|
+
# legacy_compat_debt.stale_migration_marker, debug_print_scan) to avoid
|
|
76
|
+
# "detector self-match" false positives where a gate finds its own pattern
|
|
77
|
+
# definitions in its own source.
|
|
78
|
+
|
|
79
|
+
_UPPER_NAME_RE = re.compile(r"^_?[A-Z][A-Z0-9_]*$")
|
|
80
|
+
|
|
81
|
+
# A comment line used as a visual section separator:
|
|
82
|
+
# # --- section ---
|
|
83
|
+
# # === Legacy Debt (C53) ===
|
|
84
|
+
# # ----- DEBUG -----
|
|
85
|
+
# # -- legacy_debt (C53) --
|
|
86
|
+
# Regular prose comments never match.
|
|
87
|
+
_SECTION_HEADER_COMMENT_RE = re.compile(
|
|
88
|
+
r"""
|
|
89
|
+
^\s*\#\s*
|
|
90
|
+
(?:
|
|
91
|
+
(?:[-=]{2,})\s*\S.*?\s*(?:[-=]{2,})?
|
|
92
|
+
|
|
|
93
|
+
\S.*?\s*[-=]{2,}
|
|
94
|
+
)
|
|
95
|
+
\s*$
|
|
96
|
+
""",
|
|
97
|
+
re.VERBOSE,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# F14c sub-fix 3: files where ``print()`` is legitimate CLI output.
|
|
101
|
+
_CLI_SURFACE_FILE_PREFIXES: tuple[str, ...] = (
|
|
102
|
+
"INTERFACE/cli/",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
_CLI_SURFACE_FILE_EXACT: frozenset[str] = frozenset({
|
|
106
|
+
"BRAIN/autoforensics/self_audit.py",
|
|
107
|
+
"BRAIN/autoforensics/cli_forensic_audit.py",
|
|
108
|
+
# Protocol-layer output helper — safe_print() wraps print(); flagging its
|
|
109
|
+
# own implementation is a false positive.
|
|
110
|
+
"SYSTEM/execution/pocketcoder_command.py",
|
|
111
|
+
# CLI dispatch entry point — cmd_list() renders project table to stdout;
|
|
112
|
+
# this is user-facing output, not a debug print.
|
|
113
|
+
"SYSTEM/runtime/app.py",
|
|
114
|
+
# Test runner utility — progress banners printed to stdout for operator
|
|
115
|
+
# visibility; not production code.
|
|
116
|
+
"SYSTEM/dev/tests/run_all_stress_tests.py",
|
|
117
|
+
# Map Builder CLI entry — cmd_map_invariants() + _print_reports() emit
|
|
118
|
+
# invariant results to stdout; Category D user-facing output.
|
|
119
|
+
"BRAIN/autoforensics/map_builder/invariant_suite.py",
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
# Filename suffixes that mark a file as a user-facing CLI entrypoint.
|
|
123
|
+
# Convention: ``<feature>/cli_entry.py`` exposes a ``cmd_*`` dispatcher for
|
|
124
|
+
# the Vigil app parser and prints human-readable progress/status.
|
|
125
|
+
_CLI_SURFACE_FILE_SUFFIXES: tuple[str, ...] = (
|
|
126
|
+
"/cli_entry.py",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
_CLI_FUNC_NAMES: frozenset[str] = frozenset({
|
|
130
|
+
"main", "_main", "cli_main", "_cli_main", "run", "cli", "_cli",
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
__all__ = [
|
|
135
|
+
"collect_string_constant_line_ranges",
|
|
136
|
+
"line_is_inside_string_constant",
|
|
137
|
+
"collect_constant_container_literal_lines",
|
|
138
|
+
"collect_print_call_line_nums",
|
|
139
|
+
"collect_cli_output_func_line_ranges",
|
|
140
|
+
"is_section_header_comment",
|
|
141
|
+
"is_cli_surface_file",
|
|
142
|
+
"collect_main_block_line_ranges",
|
|
143
|
+
"line_in_ranges",
|
|
144
|
+
"parse_python_source_or_emit_finding",
|
|
145
|
+
"build_syntax_parse_error_finding",
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# B2 (2026-04-23) -- defensive meta-integrity
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
# Rationale
|
|
153
|
+
# Historically every "AST gate" in autoforensics opened with:
|
|
154
|
+
# try:
|
|
155
|
+
# tree = ast.parse(source)
|
|
156
|
+
# except SyntaxError:
|
|
157
|
+
# return [] # or ``continue``
|
|
158
|
+
# A real SyntaxError in production code is a REAL BUG, but that try/except
|
|
159
|
+
# made the gate *blind* -- zero findings emitted => file looks clean.
|
|
160
|
+
#
|
|
161
|
+
# Fix shape
|
|
162
|
+
# ``parse_python_source_or_emit_finding`` is a drop-in replacement for the
|
|
163
|
+
# silent try/except. On SyntaxError it calls the caller-supplied
|
|
164
|
+
# ``emit_finding`` hook with a ``meta.syntax_parse_error`` finding, then
|
|
165
|
+
# returns ``None`` so the caller preserves its own control flow.
|
|
166
|
+
#
|
|
167
|
+
# Do NOT use this for helpers that are BY DESIGN syntax-tolerant
|
|
168
|
+
# (``collect_string_constant_line_ranges``, ``collect_main_block_line_ranges``,
|
|
169
|
+
# ``collect_constant_container_literal_lines``) -- those fall back to empty
|
|
170
|
+
# results on purpose and must remain silent.
|
|
171
|
+
|
|
172
|
+
_PYTHON_EXTENSIONS: frozenset[str] = frozenset({".py", ".pyi"})
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _looks_like_python_path(rel_path: str) -> bool:
|
|
176
|
+
"""True iff ``rel_path`` looks like a Python source file by extension."""
|
|
177
|
+
if not rel_path:
|
|
178
|
+
return False
|
|
179
|
+
normalized = rel_path.replace("\\", "/").lower()
|
|
180
|
+
dot = normalized.rfind(".")
|
|
181
|
+
if dot < 0:
|
|
182
|
+
return False
|
|
183
|
+
return normalized[dot:] in _PYTHON_EXTENSIONS
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def build_syntax_parse_error_finding(
|
|
187
|
+
*,
|
|
188
|
+
rel_path: str,
|
|
189
|
+
exc: SyntaxError,
|
|
190
|
+
emitting_gate: str = "",
|
|
191
|
+
) -> GateFinding:
|
|
192
|
+
"""Construct the canonical ``meta.syntax_parse_error`` GateFinding.
|
|
193
|
+
|
|
194
|
+
Separated from :func:`parse_python_source_or_emit_finding` so tests can
|
|
195
|
+
assert shape without spinning up a parser.
|
|
196
|
+
"""
|
|
197
|
+
line_info = f"line {exc.lineno}" if exc.lineno else "unknown line"
|
|
198
|
+
msg = str(exc.msg) if exc.msg else "unknown parse error"
|
|
199
|
+
evidence = (
|
|
200
|
+
EvidenceReference(
|
|
201
|
+
kind="syntax_error",
|
|
202
|
+
path=rel_path,
|
|
203
|
+
detail=f"{line_info}: {msg}"[:512],
|
|
204
|
+
),
|
|
205
|
+
)
|
|
206
|
+
# Deterministic fingerprint: (path, line) - if same file + same line has
|
|
207
|
+
# two gates each emit meta.syntax_parse_error, they share a fingerprint
|
|
208
|
+
# and self-audit dedup can collapse them downstream.
|
|
209
|
+
fp_source = f"meta.syntax_parse_error|{rel_path}|{exc.lineno}"
|
|
210
|
+
fingerprint = hashlib.sha256(fp_source.encode("utf-8")).hexdigest()[:16]
|
|
211
|
+
emitter_tag = f" [emitted by {emitting_gate}]" if emitting_gate else ""
|
|
212
|
+
return GateFinding(
|
|
213
|
+
check_id="meta.syntax_parse_error",
|
|
214
|
+
category=GateCategory.META,
|
|
215
|
+
title=f"Python syntax error in {rel_path} ({line_info})",
|
|
216
|
+
severity=GateSeverity.HIGH,
|
|
217
|
+
impact=GateImpact.REVISE,
|
|
218
|
+
summary=(
|
|
219
|
+
f"{rel_path}:{exc.lineno}: {msg}. Autoforensics gate could not "
|
|
220
|
+
f"parse this file and skipped its checks for this path.{emitter_tag}"
|
|
221
|
+
),
|
|
222
|
+
recommendation=(
|
|
223
|
+
"Fix the Python syntax error so gates can parse and audit this "
|
|
224
|
+
"file. A silent skip hides real bugs from the audit."
|
|
225
|
+
),
|
|
226
|
+
evidence=evidence,
|
|
227
|
+
fingerprint=fingerprint,
|
|
228
|
+
repair_kind=RepairKind.FIX_SYNTAX.value,
|
|
229
|
+
executor_action="fix Python syntax error",
|
|
230
|
+
proof_required="ast.parse succeeds on the file",
|
|
231
|
+
allowlist_allowed=False,
|
|
232
|
+
preferred_fix_shape="restore valid Python grammar; do not silence via except",
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def parse_python_source_or_emit_finding(
|
|
237
|
+
source: str,
|
|
238
|
+
*,
|
|
239
|
+
rel_path: str,
|
|
240
|
+
emit_finding: Optional[Callable[[GateFinding], None]] = None,
|
|
241
|
+
emitting_gate: str = "",
|
|
242
|
+
filename: str | None = None,
|
|
243
|
+
) -> ast.Module | None:
|
|
244
|
+
"""Parse Python source and return the AST module, or emit a meta finding.
|
|
245
|
+
|
|
246
|
+
Behavior:
|
|
247
|
+
* On success: returns the ``ast.Module``.
|
|
248
|
+
* On ``SyntaxError``: if ``emit_finding`` was provided, calls it with a
|
|
249
|
+
``meta.syntax_parse_error`` finding, then returns ``None``. Caller is
|
|
250
|
+
responsible for mirroring its own control flow (``return``/``continue``).
|
|
251
|
+
* When ``emit_finding is None`` (unit tests, utility helpers): no
|
|
252
|
+
side-effects on error; simply returns ``None``.
|
|
253
|
+
"""
|
|
254
|
+
if not source:
|
|
255
|
+
return None
|
|
256
|
+
try:
|
|
257
|
+
return ast.parse(source, filename=filename or rel_path or "<unknown>")
|
|
258
|
+
except SyntaxError as exc:
|
|
259
|
+
if emit_finding is not None and _looks_like_python_path(rel_path):
|
|
260
|
+
try:
|
|
261
|
+
emit_finding(
|
|
262
|
+
build_syntax_parse_error_finding(
|
|
263
|
+
rel_path=rel_path,
|
|
264
|
+
exc=exc,
|
|
265
|
+
emitting_gate=emitting_gate,
|
|
266
|
+
)
|
|
267
|
+
)
|
|
268
|
+
except Exception: # noqa: BLE001 -- never crash a gate on emit failure
|
|
269
|
+
pass
|
|
270
|
+
return None
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _collect_impl(source: str) -> frozenset[int]:
|
|
274
|
+
try:
|
|
275
|
+
tree = ast.parse(source)
|
|
276
|
+
except (SyntaxError, ValueError):
|
|
277
|
+
# ValueError catches things like source containing a null byte.
|
|
278
|
+
return frozenset()
|
|
279
|
+
|
|
280
|
+
lines: set[int] = set()
|
|
281
|
+
for node in ast.walk(tree):
|
|
282
|
+
# ast.Constant(value=str) — covers plain "..."/'...', triple-quoted
|
|
283
|
+
# """...""", docstrings, and string-type forward refs.
|
|
284
|
+
if isinstance(node, ast.Constant) and isinstance(node.value, str):
|
|
285
|
+
start = getattr(node, "lineno", None)
|
|
286
|
+
end = getattr(node, "end_lineno", None)
|
|
287
|
+
if start is None or end is None:
|
|
288
|
+
continue
|
|
289
|
+
start = int(start)
|
|
290
|
+
end = int(end)
|
|
291
|
+
# Only the *interior* lines of a multi-line string are purely
|
|
292
|
+
# inside the literal. The opening line (``x = '''``) and the
|
|
293
|
+
# closing line (``'''``) may carry real code before the opening
|
|
294
|
+
# quote or after the closing quote (e.g. ``raise ValueError("bad")``
|
|
295
|
+
# is a single-line string on a real statement line). A single-line
|
|
296
|
+
# string (start == end) contributes no excluded line.
|
|
297
|
+
if end - start < 2:
|
|
298
|
+
continue
|
|
299
|
+
for ln in range(start + 1, end):
|
|
300
|
+
lines.add(ln)
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
# ast.JoinedStr — f-strings. Same interior-only rule: only middle
|
|
304
|
+
# lines of a multi-line f-string are purely string content.
|
|
305
|
+
if isinstance(node, ast.JoinedStr):
|
|
306
|
+
start = getattr(node, "lineno", None)
|
|
307
|
+
end = getattr(node, "end_lineno", None)
|
|
308
|
+
if start is None or end is None:
|
|
309
|
+
continue
|
|
310
|
+
start = int(start)
|
|
311
|
+
end = int(end)
|
|
312
|
+
if end - start < 2:
|
|
313
|
+
continue
|
|
314
|
+
for ln in range(start + 1, end):
|
|
315
|
+
lines.add(ln)
|
|
316
|
+
continue
|
|
317
|
+
|
|
318
|
+
return frozenset(lines)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
@lru_cache(maxsize=256)
|
|
322
|
+
def _collect_cached(source: str) -> frozenset[int]:
|
|
323
|
+
"""LRU-cached parse. Keyed on the full source string so repeated calls
|
|
324
|
+
during a single gate run (multiple regex passes on the same file) parse
|
|
325
|
+
the file exactly once. Cache size is bounded to keep memory flat."""
|
|
326
|
+
return _collect_impl(source)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def collect_string_constant_line_ranges(source: str) -> frozenset[int]:
|
|
330
|
+
"""Return 1-based line numbers that fall inside any Python string literal.
|
|
331
|
+
|
|
332
|
+
Covers:
|
|
333
|
+
* single-quoted / double-quoted string constants,
|
|
334
|
+
* triple-quoted string constants (docstrings and plain literals),
|
|
335
|
+
* f-strings (``ast.JoinedStr``).
|
|
336
|
+
|
|
337
|
+
Returns ``frozenset()`` if ``source`` is not valid Python — no safe
|
|
338
|
+
exclusions means unchanged prior behavior for that file.
|
|
339
|
+
|
|
340
|
+
Intended use
|
|
341
|
+
------------
|
|
342
|
+
At the top of a line-based gate runner::
|
|
343
|
+
|
|
344
|
+
excluded = collect_string_constant_line_ranges(content)
|
|
345
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
346
|
+
if i in excluded:
|
|
347
|
+
continue
|
|
348
|
+
...
|
|
349
|
+
|
|
350
|
+
For regex matches on the whole ``content`` (not per-line), convert the
|
|
351
|
+
match offset to a line number via ``content[:m.start()].count("\\n") + 1``
|
|
352
|
+
and check that line against ``excluded``.
|
|
353
|
+
"""
|
|
354
|
+
if not source:
|
|
355
|
+
return frozenset()
|
|
356
|
+
try:
|
|
357
|
+
return _collect_cached(source)
|
|
358
|
+
except TypeError:
|
|
359
|
+
# lru_cache requires hashable; source is str, so this should never
|
|
360
|
+
# happen. Keep defense in depth anyway.
|
|
361
|
+
return _collect_impl(source)
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def line_is_inside_string_constant(source: str, lineno: int) -> bool:
|
|
365
|
+
"""Convenience wrapper — True iff ``lineno`` is covered by any string literal."""
|
|
366
|
+
return lineno in collect_string_constant_line_ranges(source)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
# ---------------------------------------------------------------------------
|
|
370
|
+
# F14c implementations
|
|
371
|
+
# ---------------------------------------------------------------------------
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def collect_constant_container_literal_lines(source: str) -> frozenset[int]:
|
|
375
|
+
"""F14c sub-fix 1: return line numbers of string literals inside
|
|
376
|
+
UPPER_CASE module-level tuple/list/set/frozenset/dict assignments.
|
|
377
|
+
|
|
378
|
+
Used by text-scanning gates to skip their own marker definitions such as::
|
|
379
|
+
|
|
380
|
+
_TECH_DEBT_MARKERS = ("TODO", "FIXME", "HACK", "XXX")
|
|
381
|
+
|
|
382
|
+
Criteria (AST-based, conservative):
|
|
383
|
+
* ``ast.Assign`` or ``ast.AnnAssign`` at module top level
|
|
384
|
+
* single target: ``ast.Name`` whose ``id`` matches ``_?[A-Z][A-Z0-9_]*``
|
|
385
|
+
* value is ``ast.Tuple``/``ast.List``/``ast.Set``/``ast.Dict``
|
|
386
|
+
OR ``ast.Call(func=Name('frozenset'|'set'|'tuple'|'list'))``
|
|
387
|
+
|
|
388
|
+
For each qualifying container we walk every string ``ast.Constant`` and
|
|
389
|
+
add the inclusive ``lineno..end_lineno`` range to the returned frozenset.
|
|
390
|
+
|
|
391
|
+
Syntax-invalid sources return an empty frozenset (fail-open to avoid
|
|
392
|
+
suppressing real findings on broken files).
|
|
393
|
+
"""
|
|
394
|
+
try:
|
|
395
|
+
tree = ast.parse(source)
|
|
396
|
+
except SyntaxError:
|
|
397
|
+
return frozenset()
|
|
398
|
+
|
|
399
|
+
out: set[int] = set()
|
|
400
|
+
|
|
401
|
+
def _string_literal_lines(value: ast.AST) -> None:
|
|
402
|
+
for sub in ast.walk(value):
|
|
403
|
+
if isinstance(sub, ast.Constant) and isinstance(sub.value, str):
|
|
404
|
+
start = int(getattr(sub, "lineno", 0) or 0)
|
|
405
|
+
end = int(getattr(sub, "end_lineno", start) or start)
|
|
406
|
+
if start <= 0:
|
|
407
|
+
continue
|
|
408
|
+
for ln in range(start, end + 1):
|
|
409
|
+
out.add(ln)
|
|
410
|
+
|
|
411
|
+
def _is_container_literal(value: ast.AST) -> bool:
|
|
412
|
+
if isinstance(value, (ast.Tuple, ast.List, ast.Set, ast.Dict)):
|
|
413
|
+
return True
|
|
414
|
+
if isinstance(value, ast.Call) and isinstance(value.func, ast.Name):
|
|
415
|
+
if value.func.id in ("frozenset", "set", "tuple", "list"):
|
|
416
|
+
return True
|
|
417
|
+
return False
|
|
418
|
+
|
|
419
|
+
for node in ast.iter_child_nodes(tree):
|
|
420
|
+
if isinstance(node, ast.Assign):
|
|
421
|
+
if len(node.targets) != 1:
|
|
422
|
+
continue
|
|
423
|
+
target = node.targets[0]
|
|
424
|
+
if not isinstance(target, ast.Name):
|
|
425
|
+
continue
|
|
426
|
+
if not _UPPER_NAME_RE.match(target.id):
|
|
427
|
+
continue
|
|
428
|
+
if _is_container_literal(node.value):
|
|
429
|
+
_string_literal_lines(node.value)
|
|
430
|
+
continue
|
|
431
|
+
|
|
432
|
+
if isinstance(node, ast.AnnAssign):
|
|
433
|
+
target = node.target
|
|
434
|
+
if not isinstance(target, ast.Name):
|
|
435
|
+
continue
|
|
436
|
+
if not _UPPER_NAME_RE.match(target.id):
|
|
437
|
+
continue
|
|
438
|
+
if node.value is not None and _is_container_literal(node.value):
|
|
439
|
+
_string_literal_lines(node.value)
|
|
440
|
+
continue
|
|
441
|
+
|
|
442
|
+
return frozenset(out)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def collect_print_call_line_nums(source: str) -> frozenset[int]:
|
|
446
|
+
"""Return 1-based line numbers of genuine Python ``print(...)`` CALLS.
|
|
447
|
+
|
|
448
|
+
Precision fix for ``debug_print_scan``: a substring/regex match on
|
|
449
|
+
``print(`` also fires on the token inside a *string literal* (e.g. a
|
|
450
|
+
detector pattern tuple ``(... "print(", ...)``) and on attribute calls
|
|
451
|
+
such as ``self.printer.print(...)``. Walking the AST and keeping only
|
|
452
|
+
``ast.Call`` nodes whose ``func`` is the bare builtin ``Name(id='print')``
|
|
453
|
+
eliminates both classes of false positive.
|
|
454
|
+
|
|
455
|
+
The reported line number is the line of the ``print`` name token
|
|
456
|
+
(``func.lineno`` when available, else ``call.lineno``) so a multi-line
|
|
457
|
+
call is attributed to its opening line — matching how the gate reports.
|
|
458
|
+
|
|
459
|
+
Syntax-invalid sources return an empty frozenset (fail-open: no AST means
|
|
460
|
+
the caller keeps its prior regex behavior for that file).
|
|
461
|
+
"""
|
|
462
|
+
try:
|
|
463
|
+
tree = ast.parse(source)
|
|
464
|
+
except (SyntaxError, ValueError):
|
|
465
|
+
return frozenset()
|
|
466
|
+
|
|
467
|
+
out: set[int] = set()
|
|
468
|
+
for node in ast.walk(tree):
|
|
469
|
+
if not isinstance(node, ast.Call):
|
|
470
|
+
continue
|
|
471
|
+
func = node.func
|
|
472
|
+
# Only the bare builtin ``print`` — NOT ``obj.print`` / ``mod.print``.
|
|
473
|
+
if isinstance(func, ast.Name) and func.id == "print":
|
|
474
|
+
lineno = getattr(func, "lineno", None) or getattr(node, "lineno", None)
|
|
475
|
+
if lineno:
|
|
476
|
+
out.add(int(lineno))
|
|
477
|
+
return frozenset(out)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def collect_cli_output_func_line_ranges(source: str) -> list[tuple[int, int]]:
|
|
481
|
+
"""Return inclusive 1-based ``(start, end)`` line ranges for functions that
|
|
482
|
+
are conventionally user-facing CLI/output surfaces, where ``print()`` is
|
|
483
|
+
intentional rather than a stray debug statement.
|
|
484
|
+
|
|
485
|
+
Conservative name rule (documented intentionally narrow):
|
|
486
|
+
* name starts with ``print_`` or ``_print_`` (e.g. ``print_human_summary``,
|
|
487
|
+
``_print_reports``), OR
|
|
488
|
+
* name is one of the canonical CLI entrypoints in ``_CLI_FUNC_NAMES``
|
|
489
|
+
(``main`` / ``cli`` / ``run`` / ``cli_main`` and underscore variants).
|
|
490
|
+
|
|
491
|
+
Only the *named* function's own body range is returned; a ``print_*``
|
|
492
|
+
function elsewhere in the file therefore does NOT silence a stray
|
|
493
|
+
``print()`` in an unrelated normal function.
|
|
494
|
+
|
|
495
|
+
Syntax-invalid sources return an empty list (fail-open).
|
|
496
|
+
"""
|
|
497
|
+
try:
|
|
498
|
+
tree = ast.parse(source)
|
|
499
|
+
except (SyntaxError, ValueError):
|
|
500
|
+
return []
|
|
501
|
+
|
|
502
|
+
ranges: list[tuple[int, int]] = []
|
|
503
|
+
for node in ast.walk(tree):
|
|
504
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
505
|
+
continue
|
|
506
|
+
name = node.name
|
|
507
|
+
is_output_func = (
|
|
508
|
+
name.startswith("print_")
|
|
509
|
+
or name.startswith("_print_")
|
|
510
|
+
or name in _CLI_FUNC_NAMES
|
|
511
|
+
)
|
|
512
|
+
if not is_output_func:
|
|
513
|
+
continue
|
|
514
|
+
start = int(getattr(node, "lineno", 0) or 0)
|
|
515
|
+
end = int(getattr(node, "end_lineno", start) or start)
|
|
516
|
+
if start > 0:
|
|
517
|
+
ranges.append((start, end))
|
|
518
|
+
return ranges
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def is_section_header_comment(line: str) -> bool:
|
|
522
|
+
"""F14c sub-fix 2: return True if ``line`` looks like a visual section
|
|
523
|
+
separator comment.
|
|
524
|
+
|
|
525
|
+
Matches::
|
|
526
|
+
|
|
527
|
+
# --- section ---
|
|
528
|
+
# === Legacy Debt (C53) ===
|
|
529
|
+
# -- legacy_debt (C53) --
|
|
530
|
+
# ----- DEBUG -----
|
|
531
|
+
# end ---
|
|
532
|
+
|
|
533
|
+
Regular prose comments (``# this is a normal comment.``) do NOT match.
|
|
534
|
+
"""
|
|
535
|
+
if not line:
|
|
536
|
+
return False
|
|
537
|
+
return bool(_SECTION_HEADER_COMMENT_RE.match(line))
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def is_cli_surface_file(file_path: str) -> bool:
|
|
541
|
+
"""F14c sub-fix 3: return True if ``file_path`` is a user-facing CLI
|
|
542
|
+
surface where ``print()`` is legitimate.
|
|
543
|
+
|
|
544
|
+
Covers:
|
|
545
|
+
* Anything under ``INTERFACE/cli/``
|
|
546
|
+
* ``BRAIN/autoforensics/self_audit.py`` and
|
|
547
|
+
``BRAIN/autoforensics/cli_forensic_audit.py`` (CLI entrypoints for
|
|
548
|
+
the autoforensics subsystem).
|
|
549
|
+
"""
|
|
550
|
+
if not file_path:
|
|
551
|
+
return False
|
|
552
|
+
normalized = file_path.replace("\\", "/").lstrip("./")
|
|
553
|
+
for hub in _CLI_SURFACE_FILE_EXACT:
|
|
554
|
+
if normalized == hub or normalized.endswith("/" + hub):
|
|
555
|
+
return True
|
|
556
|
+
for prefix in _CLI_SURFACE_FILE_PREFIXES:
|
|
557
|
+
if prefix in normalized:
|
|
558
|
+
return True
|
|
559
|
+
for suffix in _CLI_SURFACE_FILE_SUFFIXES:
|
|
560
|
+
if normalized.endswith(suffix) or normalized == suffix.lstrip("/"):
|
|
561
|
+
return True
|
|
562
|
+
return False
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
def collect_main_block_line_ranges(source: str) -> list[tuple[int, int]]:
|
|
566
|
+
"""F14c sub-fix 3: return inclusive line ranges covered by
|
|
567
|
+
``if __name__ == "__main__":`` blocks at module top level, plus
|
|
568
|
+
conventionally-named CLI entrypoint functions (``main``, ``cli_main``,
|
|
569
|
+
``run``, ``_cli_*`` etc.).
|
|
570
|
+
|
|
571
|
+
``print()`` inside any of these ranges is legitimate CLI output.
|
|
572
|
+
|
|
573
|
+
Fail-open: syntax errors return ``[]``.
|
|
574
|
+
"""
|
|
575
|
+
try:
|
|
576
|
+
tree = ast.parse(source)
|
|
577
|
+
except SyntaxError:
|
|
578
|
+
return []
|
|
579
|
+
|
|
580
|
+
ranges: list[tuple[int, int]] = []
|
|
581
|
+
|
|
582
|
+
def _is_main_guard(node: ast.AST) -> bool:
|
|
583
|
+
if not isinstance(node, ast.If):
|
|
584
|
+
return False
|
|
585
|
+
test = node.test
|
|
586
|
+
if not isinstance(test, ast.Compare):
|
|
587
|
+
return False
|
|
588
|
+
if len(test.ops) != 1 or not isinstance(test.ops[0], ast.Eq):
|
|
589
|
+
return False
|
|
590
|
+
left = test.left
|
|
591
|
+
right = test.comparators[0]
|
|
592
|
+
|
|
593
|
+
def _is_name(n: ast.AST) -> bool:
|
|
594
|
+
return isinstance(n, ast.Name) and n.id == "__name__"
|
|
595
|
+
|
|
596
|
+
def _is_main_const(n: ast.AST) -> bool:
|
|
597
|
+
return isinstance(n, ast.Constant) and n.value == "__main__"
|
|
598
|
+
|
|
599
|
+
return (_is_name(left) and _is_main_const(right)) or (
|
|
600
|
+
_is_name(right) and _is_main_const(left)
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
for node in ast.iter_child_nodes(tree):
|
|
604
|
+
if _is_main_guard(node):
|
|
605
|
+
start = int(getattr(node, "lineno", 0) or 0)
|
|
606
|
+
end = int(getattr(node, "end_lineno", start) or start)
|
|
607
|
+
if start > 0:
|
|
608
|
+
ranges.append((start, end))
|
|
609
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
610
|
+
if node.name in _CLI_FUNC_NAMES or node.name.startswith("_cli_"):
|
|
611
|
+
start = int(getattr(node, "lineno", 0) or 0)
|
|
612
|
+
end = int(getattr(node, "end_lineno", start) or start)
|
|
613
|
+
if start > 0:
|
|
614
|
+
ranges.append((start, end))
|
|
615
|
+
|
|
616
|
+
return ranges
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def line_in_ranges(
|
|
620
|
+
line_num: int,
|
|
621
|
+
ranges: list[tuple[int, int]] | tuple[tuple[int, int], ...],
|
|
622
|
+
) -> bool:
|
|
623
|
+
"""F14c helper: return True if ``line_num`` falls within any inclusive
|
|
624
|
+
``(start, end)`` range in ``ranges``.
|
|
625
|
+
"""
|
|
626
|
+
for start, end in ranges:
|
|
627
|
+
if start <= line_num <= end:
|
|
628
|
+
return True
|
|
629
|
+
return False
|