vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""Allowlist writer — programmatic Python API to append FP entries.
|
|
2
|
+
|
|
3
|
+
Sprint D1 (2026-04-23). Previously only the executor could write
|
|
4
|
+
`false_positive_allowlist.json` via file tool calls. This module adds a
|
|
5
|
+
Python writer so the PE supervisor pipeline can persist FP classifications
|
|
6
|
+
without going through a file-edit round-trip.
|
|
7
|
+
|
|
8
|
+
Contract
|
|
9
|
+
--------
|
|
10
|
+
* Only findings with `applicability="unknown"` are eligible. Any entry whose
|
|
11
|
+
`finding_snapshot.applicability` differs raises ValueError (fail-loud).
|
|
12
|
+
* Callers on the PE path must set `classifier="pe_supervisor"`. Other
|
|
13
|
+
classifier values raise ValueError at this entrypoint.
|
|
14
|
+
* Merge by fingerprint: an existing entry with the same fingerprint is
|
|
15
|
+
overwritten (latest classifier wins) — idempotent.
|
|
16
|
+
* Atomic file write (tempfile.mkstemp + os.replace) — same pattern as
|
|
17
|
+
`save_allowlist()` in allowlist.py, safe against partial writes and
|
|
18
|
+
reasonably safe across concurrent sessions on the same drive.
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import json
|
|
23
|
+
import logging
|
|
24
|
+
import os
|
|
25
|
+
import tempfile
|
|
26
|
+
import time
|
|
27
|
+
from dataclasses import dataclass, field
|
|
28
|
+
from datetime import datetime, timezone
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Literal, Optional, TYPE_CHECKING
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from vigil_forensic._shared import GateFinding
|
|
34
|
+
|
|
35
|
+
_log = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
_ALLOWLIST_PATH_PARTS = (".prompt-engineer", "forensic_gates", "false_positive_allowlist.json")
|
|
38
|
+
|
|
39
|
+
_ALLOWED_CLASSIFIERS: frozenset[str] = frozenset({"executor", "pe_supervisor", "human"})
|
|
40
|
+
|
|
41
|
+
# Sprint (2026-04-24): default TTL for PE-classifier safety mechanisms.
|
|
42
|
+
DEFAULT_PE_TTL_DAYS = 30
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class FPAllowlistEntry:
|
|
47
|
+
"""Structured FP allowlist entry for programmatic writes.
|
|
48
|
+
|
|
49
|
+
`finding_snapshot` must contain `applicability` — the writer validates
|
|
50
|
+
it equals "unknown" before persisting. `expires_at=""` means no TTL
|
|
51
|
+
(matches executor-written entries).
|
|
52
|
+
"""
|
|
53
|
+
fingerprint: str
|
|
54
|
+
reason: str
|
|
55
|
+
classifier: Literal["executor", "pe_supervisor", "human"]
|
|
56
|
+
classified_at: str
|
|
57
|
+
session_num: int
|
|
58
|
+
finding_snapshot: dict
|
|
59
|
+
evidence_type: str = "design_decision"
|
|
60
|
+
expires_at: str = ""
|
|
61
|
+
# Derived from finding_snapshot when the writer expands to disk format,
|
|
62
|
+
# but callers may override if they want a custom file/line/evidence.
|
|
63
|
+
check_id: str = ""
|
|
64
|
+
file: str = ""
|
|
65
|
+
line: int = 0
|
|
66
|
+
evidence: str = ""
|
|
67
|
+
added_by: str = ""
|
|
68
|
+
added_at: str = ""
|
|
69
|
+
reviewed_by: str = ""
|
|
70
|
+
# Sprint (2026-04-24): TTL + code-hash safety mechanisms.
|
|
71
|
+
# `created_at` defaults to time.time() at construction; `code_hash` is
|
|
72
|
+
# populated by the factory (build_pe_fp_entry_from_finding) using
|
|
73
|
+
# SYSTEM.shared_helpers.file_hash.compute_code_hash.
|
|
74
|
+
created_at: float = 0.0
|
|
75
|
+
ttl_days: int = DEFAULT_PE_TTL_DAYS
|
|
76
|
+
code_hash: str = ""
|
|
77
|
+
extra: dict = field(default_factory=dict)
|
|
78
|
+
|
|
79
|
+
def to_disk_dict(self) -> dict[str, object]:
|
|
80
|
+
"""Flatten into the JSON shape used by false_positive_allowlist.json."""
|
|
81
|
+
snapshot = dict(self.finding_snapshot)
|
|
82
|
+
check_id = self.check_id or str(snapshot.get("check_id", "") or "")
|
|
83
|
+
file_path = self.file or str(snapshot.get("file", "") or "")
|
|
84
|
+
line_no = self.line or int(snapshot.get("line", 0) or 0)
|
|
85
|
+
evidence = self.evidence or self.reason
|
|
86
|
+
added_at = self.added_at or self.classified_at
|
|
87
|
+
added_by = self.added_by or self.classifier
|
|
88
|
+
created_at = float(self.created_at) if self.created_at > 0.0 else float(time.time())
|
|
89
|
+
payload: dict[str, object] = {
|
|
90
|
+
"fingerprint": self.fingerprint,
|
|
91
|
+
"check_id": check_id,
|
|
92
|
+
"file": file_path,
|
|
93
|
+
"line": line_no,
|
|
94
|
+
"reason": self.reason,
|
|
95
|
+
"evidence_type": self.evidence_type,
|
|
96
|
+
"evidence": evidence,
|
|
97
|
+
"added_by": added_by,
|
|
98
|
+
"added_at": added_at,
|
|
99
|
+
"reviewed_by": self.reviewed_by,
|
|
100
|
+
"expires_at": self.expires_at,
|
|
101
|
+
"classifier": self.classifier,
|
|
102
|
+
"classified_at": self.classified_at,
|
|
103
|
+
"created_at": created_at,
|
|
104
|
+
"ttl_days": int(self.ttl_days),
|
|
105
|
+
"code_hash": self.code_hash,
|
|
106
|
+
"finding_snapshot": snapshot,
|
|
107
|
+
}
|
|
108
|
+
for key, value in self.extra.items():
|
|
109
|
+
payload.setdefault(key, value)
|
|
110
|
+
return payload
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _resolve_path(project_dir: Path) -> Path:
|
|
114
|
+
path = Path(project_dir)
|
|
115
|
+
for part in _ALLOWLIST_PATH_PARTS:
|
|
116
|
+
path = path / part
|
|
117
|
+
return path
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _validate_entry(entry: FPAllowlistEntry) -> None:
|
|
121
|
+
"""Enforce D1 constraints on a single entry."""
|
|
122
|
+
if entry.classifier not in _ALLOWED_CLASSIFIERS:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
f"write_fp_allowlist_entries: unsupported classifier "
|
|
125
|
+
f"{entry.classifier!r}; allowed: {sorted(_ALLOWED_CLASSIFIERS)}"
|
|
126
|
+
)
|
|
127
|
+
if not entry.fingerprint:
|
|
128
|
+
raise ValueError("write_fp_allowlist_entries: entry has empty fingerprint")
|
|
129
|
+
if len((entry.reason or "").strip()) < 10:
|
|
130
|
+
raise ValueError(
|
|
131
|
+
f"write_fp_allowlist_entries: reason for {entry.fingerprint!r} "
|
|
132
|
+
f"too short (min 10 chars): {entry.reason!r}"
|
|
133
|
+
)
|
|
134
|
+
snapshot_app = str(entry.finding_snapshot.get("applicability", "") or "")
|
|
135
|
+
if snapshot_app != "unknown":
|
|
136
|
+
raise ValueError(
|
|
137
|
+
f"write_fp_allowlist_entries: finding_snapshot.applicability must be "
|
|
138
|
+
f"'unknown' for fingerprint {entry.fingerprint!r}, got {snapshot_app!r}. "
|
|
139
|
+
"Only uncertain findings are eligible for programmatic allowlist writes."
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def write_fp_allowlist_entries(
|
|
144
|
+
project_dir: Path,
|
|
145
|
+
entries: list[FPAllowlistEntry],
|
|
146
|
+
) -> int:
|
|
147
|
+
"""Atomic JSON update. Returns the number of new or updated entries.
|
|
148
|
+
|
|
149
|
+
Idempotent: re-writing an entry with the same fingerprint replaces the
|
|
150
|
+
existing record rather than adding a duplicate. Entries are first
|
|
151
|
+
validated — the call fails loudly if any entry violates the D1 contract
|
|
152
|
+
(non-unknown applicability, missing fingerprint, short reason, unknown
|
|
153
|
+
classifier). Nothing is written if validation fails.
|
|
154
|
+
"""
|
|
155
|
+
if not entries:
|
|
156
|
+
return 0
|
|
157
|
+
|
|
158
|
+
# Validate ALL entries before touching the file — fail-loud, no partial writes.
|
|
159
|
+
for entry in entries:
|
|
160
|
+
_validate_entry(entry)
|
|
161
|
+
|
|
162
|
+
allowlist_path = _resolve_path(project_dir)
|
|
163
|
+
allowlist_path.parent.mkdir(parents=True, exist_ok=True)
|
|
164
|
+
|
|
165
|
+
existing: list[dict] = []
|
|
166
|
+
if allowlist_path.exists():
|
|
167
|
+
try:
|
|
168
|
+
raw = allowlist_path.read_text(encoding="utf-8")
|
|
169
|
+
loaded = json.loads(raw) if raw.strip() else []
|
|
170
|
+
if isinstance(loaded, list):
|
|
171
|
+
existing = loaded
|
|
172
|
+
else:
|
|
173
|
+
_log.warning(
|
|
174
|
+
"allowlist_writer: existing allowlist is not a JSON array at %s "
|
|
175
|
+
"(got %s); ignoring existing content.",
|
|
176
|
+
allowlist_path, type(loaded).__name__,
|
|
177
|
+
)
|
|
178
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
179
|
+
_log.warning(
|
|
180
|
+
"allowlist_writer: could not read %s (%s); "
|
|
181
|
+
"writing fresh with new entries only.",
|
|
182
|
+
allowlist_path, exc,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
by_fp: dict[str, dict] = {}
|
|
186
|
+
for item in existing:
|
|
187
|
+
if isinstance(item, dict):
|
|
188
|
+
fp = str(item.get("fingerprint", "") or "")
|
|
189
|
+
if fp:
|
|
190
|
+
by_fp[fp] = item
|
|
191
|
+
|
|
192
|
+
changed = 0
|
|
193
|
+
for entry in entries:
|
|
194
|
+
disk = entry.to_disk_dict()
|
|
195
|
+
fp = entry.fingerprint
|
|
196
|
+
if fp in by_fp and by_fp[fp] == disk:
|
|
197
|
+
continue
|
|
198
|
+
by_fp[fp] = disk
|
|
199
|
+
changed += 1
|
|
200
|
+
_log.info(
|
|
201
|
+
"allowlist_writer: upserted fingerprint=%r classifier=%r check_id=%r",
|
|
202
|
+
fp, entry.classifier, disk.get("check_id"),
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
merged = list(by_fp.values())
|
|
206
|
+
content = json.dumps(merged, indent=2, ensure_ascii=False) + "\n"
|
|
207
|
+
|
|
208
|
+
tmp_fd, tmp_path = tempfile.mkstemp(dir=str(allowlist_path.parent), suffix=".tmp")
|
|
209
|
+
try:
|
|
210
|
+
os.write(tmp_fd, content.encode("utf-8"))
|
|
211
|
+
os.close(tmp_fd)
|
|
212
|
+
os.replace(tmp_path, str(allowlist_path))
|
|
213
|
+
except BaseException:
|
|
214
|
+
try:
|
|
215
|
+
os.close(tmp_fd)
|
|
216
|
+
except OSError:
|
|
217
|
+
pass
|
|
218
|
+
try:
|
|
219
|
+
os.unlink(tmp_path)
|
|
220
|
+
except OSError:
|
|
221
|
+
pass
|
|
222
|
+
raise
|
|
223
|
+
|
|
224
|
+
_log.info(
|
|
225
|
+
"allowlist_writer: wrote %d total entries (%d new/updated) to %s",
|
|
226
|
+
len(merged), changed, allowlist_path,
|
|
227
|
+
)
|
|
228
|
+
return changed
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def build_pe_fp_entry_from_finding(
|
|
232
|
+
finding: "GateFinding",
|
|
233
|
+
*,
|
|
234
|
+
reason: str,
|
|
235
|
+
session_num: int,
|
|
236
|
+
evidence: Optional[str] = None,
|
|
237
|
+
evidence_type: str = "design_decision",
|
|
238
|
+
expires_at: str = "",
|
|
239
|
+
now_iso: str = "",
|
|
240
|
+
project_dir: Optional[Path] = None,
|
|
241
|
+
ttl_days: int = DEFAULT_PE_TTL_DAYS,
|
|
242
|
+
) -> FPAllowlistEntry:
|
|
243
|
+
"""Convert a GateFinding into a PE-classified FPAllowlistEntry.
|
|
244
|
+
|
|
245
|
+
Only admits findings with applicability="unknown" — the writer rejects
|
|
246
|
+
anything else at validation time, so this factory does the same check
|
|
247
|
+
eagerly for better error messages.
|
|
248
|
+
|
|
249
|
+
Sprint (2026-04-24): stamps `created_at` (epoch seconds) and `code_hash`
|
|
250
|
+
(SHA-256 of evidence file). When `project_dir` is supplied, the hash is
|
|
251
|
+
computed against ``project_dir / primary_path``. When omitted, the hash
|
|
252
|
+
is left empty (read-time hash check skipped, TTL still applies).
|
|
253
|
+
"""
|
|
254
|
+
if finding.applicability != "unknown":
|
|
255
|
+
raise ValueError(
|
|
256
|
+
f"build_pe_fp_entry_from_finding: finding {finding.fingerprint!r} has "
|
|
257
|
+
f"applicability={finding.applicability!r} — only 'unknown' findings are "
|
|
258
|
+
"eligible for PE-supervised FP classification."
|
|
259
|
+
)
|
|
260
|
+
if not now_iso:
|
|
261
|
+
now_iso = datetime.now(tz=timezone.utc).isoformat()
|
|
262
|
+
primary_path = ""
|
|
263
|
+
primary_line = 0
|
|
264
|
+
if finding.evidence:
|
|
265
|
+
primary_path = (finding.evidence[0].path or "").strip()
|
|
266
|
+
detail = finding.evidence[0].detail or ""
|
|
267
|
+
for token in detail.split():
|
|
268
|
+
try:
|
|
269
|
+
primary_line = int(token)
|
|
270
|
+
break
|
|
271
|
+
except ValueError:
|
|
272
|
+
continue
|
|
273
|
+
snapshot = {
|
|
274
|
+
"check_id": finding.check_id,
|
|
275
|
+
"confidence": finding.confidence,
|
|
276
|
+
"applicability": finding.applicability,
|
|
277
|
+
"applicability_reason": finding.applicability_reason,
|
|
278
|
+
"analysis_mode": finding.analysis_mode,
|
|
279
|
+
"session_num": session_num,
|
|
280
|
+
"file": primary_path,
|
|
281
|
+
"line": primary_line,
|
|
282
|
+
}
|
|
283
|
+
code_hash = "" # standalone: code-hash stamping unavailable
|
|
284
|
+
return FPAllowlistEntry(
|
|
285
|
+
fingerprint=finding.fingerprint,
|
|
286
|
+
reason=reason.strip(),
|
|
287
|
+
classifier="pe_supervisor",
|
|
288
|
+
classified_at=now_iso,
|
|
289
|
+
session_num=session_num,
|
|
290
|
+
finding_snapshot=snapshot,
|
|
291
|
+
evidence_type=evidence_type,
|
|
292
|
+
expires_at=expires_at,
|
|
293
|
+
check_id=finding.check_id,
|
|
294
|
+
file=primary_path,
|
|
295
|
+
line=primary_line,
|
|
296
|
+
evidence=(evidence or reason).strip(),
|
|
297
|
+
added_by="pe_supervisor",
|
|
298
|
+
added_at=now_iso,
|
|
299
|
+
created_at=float(time.time()),
|
|
300
|
+
ttl_days=int(ttl_days),
|
|
301
|
+
code_hash=code_hash,
|
|
302
|
+
)
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""API/protocol surface forensics. Clusters 27, 28b, 29b, 30."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from .core import detect_language
|
|
5
|
+
from ...gate_models import (
|
|
6
|
+
EvidenceReference,
|
|
7
|
+
GateCategory,
|
|
8
|
+
GateFinding,
|
|
9
|
+
GateImpact,
|
|
10
|
+
GateSeverity,
|
|
11
|
+
RepairKind,
|
|
12
|
+
)
|
|
13
|
+
from ..common import build_finding
|
|
14
|
+
import logging
|
|
15
|
+
_log = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def assess_embedded_code_syntax(
|
|
19
|
+
file_path: str,
|
|
20
|
+
content: str,
|
|
21
|
+
) -> list[GateFinding]:
|
|
22
|
+
"""Cluster 27: Validate syntax of JS/CSS/HTML embedded in string literals."""
|
|
23
|
+
import re
|
|
24
|
+
|
|
25
|
+
if not content.strip():
|
|
26
|
+
return []
|
|
27
|
+
if detect_language(file_path) != "python":
|
|
28
|
+
return []
|
|
29
|
+
|
|
30
|
+
findings: list[GateFinding] = []
|
|
31
|
+
string_vars = re.finditer(
|
|
32
|
+
r'^(_?[A-Z][A-Z_0-9]*)\s*=\s*(?:f?"""(.*?)"""|f?\'\'\'(.*?)\'\'\')',
|
|
33
|
+
content,
|
|
34
|
+
re.MULTILINE | re.DOTALL,
|
|
35
|
+
)
|
|
36
|
+
for match in string_vars:
|
|
37
|
+
var_name = match.group(1)
|
|
38
|
+
embedded = match.group(2) or match.group(3) or ""
|
|
39
|
+
if len(embedded) < 50:
|
|
40
|
+
continue
|
|
41
|
+
is_js = bool(re.search(r'\bfunction\b|\bvar\b|\bconst\b|\blet\b|\bdocument\b|\bfetch\b|\baddEventListener\b', embedded))
|
|
42
|
+
is_css = bool(re.search(r'[.#]\w+\s*\{|:\s*\w+;|@media\b', embedded))
|
|
43
|
+
is_html = bool(re.search(r'<div\b|<span\b|<nav\b|<button\b|class="', embedded))
|
|
44
|
+
if not (is_js or is_css or is_html):
|
|
45
|
+
continue
|
|
46
|
+
line_num = content[:match.start()].count("\n") + 1
|
|
47
|
+
open_chars = {'(': ')', '{': '}', '[': ']'}
|
|
48
|
+
close_chars = {v: k for k, v in open_chars.items()}
|
|
49
|
+
stack: list[str] = []
|
|
50
|
+
issue: str | None = None
|
|
51
|
+
for ch in embedded:
|
|
52
|
+
if ch in open_chars:
|
|
53
|
+
stack.append(ch)
|
|
54
|
+
elif ch in close_chars:
|
|
55
|
+
if not stack:
|
|
56
|
+
issue = f"Unmatched closing '{ch}' in embedded code ({var_name})"
|
|
57
|
+
break
|
|
58
|
+
if stack[-1] != close_chars[ch]:
|
|
59
|
+
issue = f"Mismatched brackets in embedded code ({var_name}): expected '{open_chars[stack[-1]]}' got '{ch}'"
|
|
60
|
+
break
|
|
61
|
+
stack.pop()
|
|
62
|
+
if issue is None and stack:
|
|
63
|
+
issue = f"Unclosed brackets in embedded code ({var_name}): {''.join(stack[-3:])}"
|
|
64
|
+
if issue:
|
|
65
|
+
findings.append(build_finding(
|
|
66
|
+
check_id="embedded_syntax_scan",
|
|
67
|
+
category=GateCategory.CONTRACT,
|
|
68
|
+
title=f"[embedded_code_syntax] {file_path}:{line_num}:{var_name}",
|
|
69
|
+
severity=GateSeverity.MEDIUM,
|
|
70
|
+
impact=GateImpact.REVISE,
|
|
71
|
+
summary=issue,
|
|
72
|
+
recommendation="Fix bracket mismatch in embedded code constant.",
|
|
73
|
+
evidence=(EvidenceReference(kind="probe", detail=issue, ok=False),),
|
|
74
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
75
|
+
executor_action=f"Fix embedded code syntax in {var_name} at {file_path}:{line_num}",
|
|
76
|
+
))
|
|
77
|
+
return findings[:10]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# DOM / built-in property names that routinely appear on variables named
|
|
81
|
+
# ``data``/``body``/``result`` etc. but are NOT response-shape fields. Matching
|
|
82
|
+
# these as missing backend keys produces 100% FP on UI/DOM code.
|
|
83
|
+
_DOM_AND_BUILTIN_PROPS: frozenset[str] = frozenset({
|
|
84
|
+
# DOM API surface
|
|
85
|
+
"appendChild", "removeChild", "replaceChild", "insertBefore", "cloneNode",
|
|
86
|
+
"className", "classList", "id", "innerHTML", "innerText", "textContent",
|
|
87
|
+
"outerHTML", "outerText", "value", "checked", "disabled", "selected",
|
|
88
|
+
"setAttribute", "getAttribute", "removeAttribute", "hasAttribute",
|
|
89
|
+
"addEventListener", "removeEventListener", "dispatchEvent",
|
|
90
|
+
"parentNode", "parentElement", "childNodes", "children",
|
|
91
|
+
"firstChild", "lastChild", "firstElementChild", "lastElementChild",
|
|
92
|
+
"nextSibling", "previousSibling", "nextElementSibling", "previousElementSibling",
|
|
93
|
+
"style", "dataset", "tagName", "nodeType", "nodeName", "nodeValue",
|
|
94
|
+
"offsetTop", "offsetLeft", "offsetWidth", "offsetHeight", "offsetParent",
|
|
95
|
+
"clientTop", "clientLeft", "clientWidth", "clientHeight",
|
|
96
|
+
"scrollTop", "scrollLeft", "scrollWidth", "scrollHeight",
|
|
97
|
+
"focus", "blur", "click", "scrollIntoView", "remove",
|
|
98
|
+
"querySelector", "querySelectorAll", "getElementsByClassName",
|
|
99
|
+
"getElementsByTagName", "contains", "matches", "closest",
|
|
100
|
+
# Standard response / fetch surface — NOT a business field
|
|
101
|
+
"then", "catch", "finally", "json", "ok", "status", "statusText",
|
|
102
|
+
"text", "body", "blob", "formData", "arrayBuffer", "headers",
|
|
103
|
+
"redirected", "type", "url", "clone",
|
|
104
|
+
# Array / String / Object methods
|
|
105
|
+
"length", "map", "forEach", "filter", "find", "findIndex", "slice",
|
|
106
|
+
"splice", "concat", "reverse", "sort", "flat", "flatMap", "includes",
|
|
107
|
+
"every", "some", "reduce", "reduceRight",
|
|
108
|
+
"toString", "valueOf", "constructor", "prototype", "hasOwnProperty",
|
|
109
|
+
"trim", "trimStart", "trimEnd", "split", "join", "replace", "replaceAll",
|
|
110
|
+
"indexOf", "lastIndexOf", "toLowerCase", "toUpperCase", "substring",
|
|
111
|
+
"substr", "charAt", "charCodeAt", "startsWith", "endsWith", "padStart",
|
|
112
|
+
"padEnd", "repeat", "normalize",
|
|
113
|
+
"push", "pop", "shift", "unshift", "entries", "keys", "values",
|
|
114
|
+
# JS error propagation
|
|
115
|
+
"error", "message", "name", "stack", "cause",
|
|
116
|
+
# Event handler surface
|
|
117
|
+
"target", "currentTarget", "preventDefault", "stopPropagation",
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def assess_response_shape_drift(
|
|
122
|
+
backend_files: dict[str, str],
|
|
123
|
+
frontend_files: dict[str, str],
|
|
124
|
+
) -> list[GateFinding]:
|
|
125
|
+
"""Cluster 28b: Detect JS reading fields that backend doesn't provide.
|
|
126
|
+
|
|
127
|
+
A field is flagged only when (a) it appears on the RHS of a ``.<field>``
|
|
128
|
+
access on a response-like identifier AND (b) it isn't a DOM/built-in
|
|
129
|
+
property like ``appendChild`` or ``className``. See ``_DOM_AND_BUILTIN_PROPS``.
|
|
130
|
+
"""
|
|
131
|
+
import re
|
|
132
|
+
|
|
133
|
+
if not backend_files or not frontend_files:
|
|
134
|
+
return []
|
|
135
|
+
|
|
136
|
+
backend_keys: set[str] = set()
|
|
137
|
+
for path, content in backend_files.items():
|
|
138
|
+
# Multi-line object literal support: dotall so ``{\n "a": 1\n}`` matches.
|
|
139
|
+
for m in re.finditer(r'_send_json\s*\([^{]*\{(.*?)\}', content, re.DOTALL):
|
|
140
|
+
keys = re.findall(r'["\'](\w+)["\']\s*:', m.group(1))
|
|
141
|
+
backend_keys.update(keys)
|
|
142
|
+
|
|
143
|
+
if not backend_keys:
|
|
144
|
+
return []
|
|
145
|
+
|
|
146
|
+
frontend_access: dict[str, list[str]] = {}
|
|
147
|
+
for path, content in frontend_files.items():
|
|
148
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
149
|
+
for m in re.finditer(r'\b(?:d|resp|data|result|body|payload|status)\.([\w]+)\b', line):
|
|
150
|
+
field = m.group(1)
|
|
151
|
+
if field in _DOM_AND_BUILTIN_PROPS:
|
|
152
|
+
continue
|
|
153
|
+
frontend_access.setdefault(field, []).append(f"{path}:{i}")
|
|
154
|
+
|
|
155
|
+
if not frontend_access:
|
|
156
|
+
return []
|
|
157
|
+
|
|
158
|
+
findings: list[GateFinding] = []
|
|
159
|
+
for field, locations in sorted(frontend_access.items()):
|
|
160
|
+
if field not in backend_keys:
|
|
161
|
+
findings.append(build_finding(
|
|
162
|
+
check_id="response_shape_scan",
|
|
163
|
+
category=GateCategory.DRIFT,
|
|
164
|
+
title=f"[response_shape_drift] .{field}",
|
|
165
|
+
severity=GateSeverity.MEDIUM,
|
|
166
|
+
impact=GateImpact.REVISE,
|
|
167
|
+
summary=f"Frontend reads '.{field}' but no backend _send_json includes '{field}' key",
|
|
168
|
+
recommendation=f"Add '{field}' to backend response or remove frontend read.",
|
|
169
|
+
evidence=(EvidenceReference(kind="probe", detail=f"Frontend reads '.{field}' but no backend _send_json includes '{field}' key", ok=False),),
|
|
170
|
+
repair_kind=RepairKind.NORMALIZE_SHAPE.value,
|
|
171
|
+
executor_action=f"Sync response shape for field '{field}'",
|
|
172
|
+
))
|
|
173
|
+
if len(findings) >= 20:
|
|
174
|
+
break
|
|
175
|
+
return findings
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def assess_http_method_consistency(
|
|
179
|
+
route_methods: dict[str, str],
|
|
180
|
+
js_fetches: list[tuple[str, str, str]],
|
|
181
|
+
) -> list[GateFinding]:
|
|
182
|
+
"""Cluster 29b: Verify JS fetch methods match registered route methods."""
|
|
183
|
+
if not route_methods or not js_fetches:
|
|
184
|
+
return []
|
|
185
|
+
|
|
186
|
+
findings: list[GateFinding] = []
|
|
187
|
+
for url, js_method, source in js_fetches:
|
|
188
|
+
clean_url = url.split("?")[0]
|
|
189
|
+
if clean_url in route_methods:
|
|
190
|
+
expected = route_methods[clean_url]
|
|
191
|
+
if js_method.upper() != expected.upper():
|
|
192
|
+
detail = f"JS fetches {clean_url} with {js_method.upper()} but route expects {expected.upper()}"
|
|
193
|
+
findings.append(build_finding(
|
|
194
|
+
check_id="method_match_scan",
|
|
195
|
+
category=GateCategory.CONTRACT,
|
|
196
|
+
title=f"[http_method_consistency] {clean_url}",
|
|
197
|
+
severity=GateSeverity.HIGH,
|
|
198
|
+
impact=GateImpact.REVISE,
|
|
199
|
+
summary=detail,
|
|
200
|
+
recommendation=f"Change JS fetch method for {clean_url} to {expected.upper()}.",
|
|
201
|
+
evidence=(EvidenceReference(kind="probe", detail=detail, ok=False),),
|
|
202
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
203
|
+
executor_action=f"Fix HTTP method mismatch for {clean_url}",
|
|
204
|
+
))
|
|
205
|
+
return findings
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def assess_js_surface_coverage(
|
|
209
|
+
all_js_constants: list[str],
|
|
210
|
+
checked_js_constants: list[str],
|
|
211
|
+
) -> list[GateFinding]:
|
|
212
|
+
"""Cluster 30: Verify all JS surface constants are covered by forensics."""
|
|
213
|
+
if not all_js_constants:
|
|
214
|
+
return []
|
|
215
|
+
|
|
216
|
+
findings: list[GateFinding] = []
|
|
217
|
+
for name in all_js_constants:
|
|
218
|
+
if name not in checked_js_constants:
|
|
219
|
+
findings.append(build_finding(
|
|
220
|
+
check_id="js_coverage_scan",
|
|
221
|
+
category=GateCategory.CONTRACT,
|
|
222
|
+
title=f"[js_surface_coverage] {name}",
|
|
223
|
+
severity=GateSeverity.MEDIUM,
|
|
224
|
+
impact=GateImpact.REVISE,
|
|
225
|
+
summary=f"JS constant '{name}' exists but is not checked by route/contract forensics",
|
|
226
|
+
recommendation=f"Add '{name}' to the checked_js_constants list in _check_js_surface_coverage.",
|
|
227
|
+
evidence=(EvidenceReference(kind="probe", detail=f"JS constant '{name}' exists but is not checked by route/contract forensics", ok=False),),
|
|
228
|
+
repair_kind=RepairKind.ADD_PROOF.value,
|
|
229
|
+
executor_action=f"Add '{name}' to JS surface coverage checks",
|
|
230
|
+
))
|
|
231
|
+
return findings
|