vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""forensic_clusters package -- public API.
|
|
2
|
+
|
|
3
|
+
Sub-modules:
|
|
4
|
+
core -- types, language detection, clusters 1-9
|
|
5
|
+
edit_mutation -- clusters 10-17
|
|
6
|
+
dead_code -- clusters 20, 23
|
|
7
|
+
code_style -- clusters 21, 22, 24, 25, 26, 28, 29 + allowlist
|
|
8
|
+
api_protocol -- clusters 27, 28b, 29b, 30
|
|
9
|
+
exception_boundary -- clusters 31, 32, 33
|
|
10
|
+
static_analysis -- clusters 34-38
|
|
11
|
+
async_quality -- clusters 39-43
|
|
12
|
+
data_quality -- clusters 44-50
|
|
13
|
+
legacy_debt -- cluster 53: legacy compatibility debt
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
# -- core --
|
|
18
|
+
from .core import (
|
|
19
|
+
ProofRequirement,
|
|
20
|
+
detect_language,
|
|
21
|
+
assess_success_proof,
|
|
22
|
+
assess_source_truthfulness,
|
|
23
|
+
assess_config_applied,
|
|
24
|
+
assess_state_consistency,
|
|
25
|
+
assess_fallback_transparency,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# -- edit_mutation --
|
|
29
|
+
from .edit_mutation import (
|
|
30
|
+
assess_edit_consistency,
|
|
31
|
+
assess_mutation_verified,
|
|
32
|
+
assess_security_patterns,
|
|
33
|
+
assess_test_quality,
|
|
34
|
+
assess_import_cycles,
|
|
35
|
+
assess_roundtrip_consistency,
|
|
36
|
+
assess_shared_mutable_state,
|
|
37
|
+
assess_dependency_vulnerabilities,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# -- dead_code --
|
|
41
|
+
from .dead_code import (
|
|
42
|
+
DeadCodeItem,
|
|
43
|
+
assess_dead_code,
|
|
44
|
+
classify_dead_code_item,
|
|
45
|
+
assess_unused_imports,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# -- code_style --
|
|
49
|
+
from .code_style import (
|
|
50
|
+
assess_secrets_in_code,
|
|
51
|
+
assess_magic_numbers,
|
|
52
|
+
assess_error_message_quality,
|
|
53
|
+
assess_naming_consistency,
|
|
54
|
+
assess_todo_debt,
|
|
55
|
+
assess_log_level_quality,
|
|
56
|
+
assess_encoding_consistency,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# -- allowlist --
|
|
60
|
+
from .allowlist import (
|
|
61
|
+
AllowlistEntry,
|
|
62
|
+
load_allowlist,
|
|
63
|
+
revalidate_allowlist,
|
|
64
|
+
save_allowlist,
|
|
65
|
+
filter_by_allowlist,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# -- api_protocol --
|
|
69
|
+
from .api_protocol import (
|
|
70
|
+
assess_embedded_code_syntax,
|
|
71
|
+
assess_response_shape_drift,
|
|
72
|
+
assess_http_method_consistency,
|
|
73
|
+
assess_js_surface_coverage,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# -- exception_boundary --
|
|
77
|
+
from .exception_boundary import (
|
|
78
|
+
assess_exception_swallowing,
|
|
79
|
+
assess_hardcoded_paths,
|
|
80
|
+
assess_boundary_validation,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# -- static_analysis --
|
|
84
|
+
from .static_analysis import (
|
|
85
|
+
assess_unreachable_code,
|
|
86
|
+
assess_shadowed_builtins,
|
|
87
|
+
assess_mutable_defaults,
|
|
88
|
+
assess_resource_leaks,
|
|
89
|
+
assess_docstring_params,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# -- async_quality --
|
|
93
|
+
from .async_quality import (
|
|
94
|
+
assess_broad_catch_no_reraise,
|
|
95
|
+
assess_debug_prints,
|
|
96
|
+
assess_commented_code,
|
|
97
|
+
assess_missing_await,
|
|
98
|
+
assess_unchecked_response,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# -- data_quality --
|
|
102
|
+
from .data_quality import (
|
|
103
|
+
assess_naive_timezone,
|
|
104
|
+
assess_near_duplicate_code,
|
|
105
|
+
assess_missing_null_check,
|
|
106
|
+
assess_path_concatenation,
|
|
107
|
+
assess_log_without_context,
|
|
108
|
+
assess_test_secrets,
|
|
109
|
+
assess_unpinned_dependencies,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# -- legacy_debt (C53) --
|
|
113
|
+
from .legacy_debt import (
|
|
114
|
+
check_forwarding_wrapper,
|
|
115
|
+
check_unused_shim_module,
|
|
116
|
+
check_stale_migration_marker,
|
|
117
|
+
check_shape_adapter_without_producer,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
__all__ = [
|
|
121
|
+
# core types
|
|
122
|
+
"ProofRequirement",
|
|
123
|
+
"DeadCodeItem",
|
|
124
|
+
"AllowlistEntry",
|
|
125
|
+
# utility
|
|
126
|
+
"detect_language",
|
|
127
|
+
# allowlist
|
|
128
|
+
"load_allowlist",
|
|
129
|
+
"revalidate_allowlist",
|
|
130
|
+
"save_allowlist",
|
|
131
|
+
"filter_by_allowlist",
|
|
132
|
+
# assess functions — universal integrity clusters (2,3,4,6,7)
|
|
133
|
+
"assess_success_proof",
|
|
134
|
+
"assess_source_truthfulness",
|
|
135
|
+
"assess_config_applied",
|
|
136
|
+
"assess_state_consistency",
|
|
137
|
+
"assess_fallback_transparency",
|
|
138
|
+
# assess functions — edit/mutation/static clusters 10-17
|
|
139
|
+
"assess_edit_consistency",
|
|
140
|
+
"assess_mutation_verified",
|
|
141
|
+
"assess_security_patterns",
|
|
142
|
+
"assess_test_quality",
|
|
143
|
+
"assess_import_cycles",
|
|
144
|
+
"assess_roundtrip_consistency",
|
|
145
|
+
"assess_shared_mutable_state",
|
|
146
|
+
"assess_dependency_vulnerabilities",
|
|
147
|
+
# assess functions — dead code / unused imports 20, 23
|
|
148
|
+
"assess_dead_code",
|
|
149
|
+
"classify_dead_code_item",
|
|
150
|
+
"assess_unused_imports",
|
|
151
|
+
# assess functions — code style 21, 22, 24, 25, 26, 28, 29
|
|
152
|
+
"assess_secrets_in_code",
|
|
153
|
+
"assess_magic_numbers",
|
|
154
|
+
"assess_error_message_quality",
|
|
155
|
+
"assess_naming_consistency",
|
|
156
|
+
"assess_todo_debt",
|
|
157
|
+
"assess_log_level_quality",
|
|
158
|
+
"assess_encoding_consistency",
|
|
159
|
+
# assess functions — api protocol 27, 28b, 29b, 30
|
|
160
|
+
"assess_embedded_code_syntax",
|
|
161
|
+
"assess_response_shape_drift",
|
|
162
|
+
"assess_http_method_consistency",
|
|
163
|
+
"assess_js_surface_coverage",
|
|
164
|
+
# assess functions — exception/boundary 31-33
|
|
165
|
+
"assess_exception_swallowing",
|
|
166
|
+
"assess_hardcoded_paths",
|
|
167
|
+
"assess_boundary_validation",
|
|
168
|
+
# assess functions — static analysis 34-38
|
|
169
|
+
"assess_unreachable_code",
|
|
170
|
+
"assess_shadowed_builtins",
|
|
171
|
+
"assess_mutable_defaults",
|
|
172
|
+
"assess_resource_leaks",
|
|
173
|
+
"assess_docstring_params",
|
|
174
|
+
# assess functions — async quality 39-43
|
|
175
|
+
"assess_broad_catch_no_reraise",
|
|
176
|
+
"assess_debug_prints",
|
|
177
|
+
"assess_commented_code",
|
|
178
|
+
"assess_missing_await",
|
|
179
|
+
"assess_unchecked_response",
|
|
180
|
+
# assess functions — data quality 44-50
|
|
181
|
+
"assess_naive_timezone",
|
|
182
|
+
"assess_near_duplicate_code",
|
|
183
|
+
"assess_missing_null_check",
|
|
184
|
+
"assess_path_concatenation",
|
|
185
|
+
"assess_log_without_context",
|
|
186
|
+
"assess_test_secrets",
|
|
187
|
+
"assess_unpinned_dependencies",
|
|
188
|
+
# assess functions — legacy compatibility debt C53
|
|
189
|
+
"check_forwarding_wrapper",
|
|
190
|
+
"check_unused_shim_module",
|
|
191
|
+
"check_stale_migration_marker",
|
|
192
|
+
"check_shape_adapter_without_producer",
|
|
193
|
+
]
|
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
"""False-positive allowlist infrastructure.
|
|
2
|
+
|
|
3
|
+
Provides AllowlistEntry, load_allowlist, revalidate_allowlist, save_allowlist,
|
|
4
|
+
and filter_by_allowlist for managing known false positives in forensic checks.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional
|
|
11
|
+
import logging
|
|
12
|
+
_log = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# # False Positive Allowlist
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
_VALID_EVIDENCE_TYPES = frozenset({
|
|
19
|
+
"grep_proof", # agente must provide grep result
|
|
20
|
+
"ast_proof", # function is called via specific pattern
|
|
21
|
+
"context_proof", # number/pattern used in specific context
|
|
22
|
+
"design_decision", # deliberate architectural choice
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Sprint (2026-04-24): default TTL for PE-classifier safety mechanisms.
|
|
27
|
+
# Each PE-classifier-written entry expires after this many days unless an
|
|
28
|
+
# operator extends `expires_at` manually. Entries without `created_at`
|
|
29
|
+
# (legacy data) are treated as fresh on first load — see load_allowlist.
|
|
30
|
+
DEFAULT_PE_TTL_DAYS = 30
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class AllowlistEntry:
|
|
35
|
+
fingerprint: str
|
|
36
|
+
check_id: str
|
|
37
|
+
file: str
|
|
38
|
+
line: int
|
|
39
|
+
reason: str
|
|
40
|
+
evidence_type: str
|
|
41
|
+
evidence: str
|
|
42
|
+
added_by: str = ""
|
|
43
|
+
added_at: str = ""
|
|
44
|
+
reviewed_by: str = ""
|
|
45
|
+
expires_at: str = ""
|
|
46
|
+
# Sprint D1 (2026-04-23): classifier identity for PE-supervisor-written
|
|
47
|
+
# entries. Backward compatible — entries without these fields load with
|
|
48
|
+
# defaults and are treated as classifier="executor".
|
|
49
|
+
classifier: str = "" # "executor" | "pe_supervisor" | "human" | ""
|
|
50
|
+
classified_at: str = "" # ISO-8601 UTC
|
|
51
|
+
# Sprint (2026-04-24): TTL + code-hash safety mechanisms for PE classifier.
|
|
52
|
+
# `created_at` is epoch seconds (float). `ttl_days` defaults to 30. After
|
|
53
|
+
# `created_at + ttl_days * 86400` the entry is filtered out at read time.
|
|
54
|
+
# `code_hash` is the SHA-256 hex of the evidence file at write time; if
|
|
55
|
+
# the file's current hash differs the entry is also filtered out. Empty
|
|
56
|
+
# `code_hash` skips the hash check (TTL still applies).
|
|
57
|
+
created_at: float = 0.0
|
|
58
|
+
ttl_days: int = DEFAULT_PE_TTL_DAYS
|
|
59
|
+
code_hash: str = ""
|
|
60
|
+
|
|
61
|
+
def is_valid(self) -> bool:
|
|
62
|
+
"""Entry is valid only if it has real proof."""
|
|
63
|
+
if not self.reason or len(self.reason) < 10:
|
|
64
|
+
return False
|
|
65
|
+
if self.evidence_type not in _VALID_EVIDENCE_TYPES:
|
|
66
|
+
return False
|
|
67
|
+
if not self.evidence or len(self.evidence) < 10:
|
|
68
|
+
return False
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
def to_dict(self) -> dict[str, object]:
|
|
72
|
+
return {
|
|
73
|
+
"fingerprint": self.fingerprint,
|
|
74
|
+
"check_id": self.check_id,
|
|
75
|
+
"file": self.file,
|
|
76
|
+
"line": self.line,
|
|
77
|
+
"reason": self.reason,
|
|
78
|
+
"evidence_type": self.evidence_type,
|
|
79
|
+
"evidence": self.evidence,
|
|
80
|
+
"added_by": self.added_by,
|
|
81
|
+
"added_at": self.added_at,
|
|
82
|
+
"reviewed_by": self.reviewed_by,
|
|
83
|
+
"expires_at": self.expires_at,
|
|
84
|
+
"classifier": self.classifier,
|
|
85
|
+
"classified_at": self.classified_at,
|
|
86
|
+
"created_at": self.created_at,
|
|
87
|
+
"ttl_days": self.ttl_days,
|
|
88
|
+
"code_hash": self.code_hash,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
_BOOTSTRAP_TEMPLATE: dict[str, object] = {
|
|
93
|
+
"_doc": (
|
|
94
|
+
"False positive allowlist for forensic gates. Add entries via Write tool when "
|
|
95
|
+
"a gate finding is verified as a false positive."
|
|
96
|
+
),
|
|
97
|
+
"_format": (
|
|
98
|
+
"list of {check_id, evidence: {kind: 'file_exists'|'mutation_verified'|...}, "
|
|
99
|
+
"justification: str, added_at: ISO_DATE}"
|
|
100
|
+
),
|
|
101
|
+
"entries": [],
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _bootstrap_allowlist_template(path: Path) -> None:
|
|
106
|
+
"""Create an empty allowlist template at *path* if it does not exist.
|
|
107
|
+
|
|
108
|
+
Idempotent: skips if file already exists. Atomic write via mkstemp +
|
|
109
|
+
os.replace so concurrent gate runs cannot observe a partial file.
|
|
110
|
+
Failures during bootstrap are silent — the caller still sees an empty
|
|
111
|
+
allowlist (`return []`), so a fresh project does not lose acknowledged
|
|
112
|
+
FPs to a transient mkdir/write error.
|
|
113
|
+
"""
|
|
114
|
+
import json as _json
|
|
115
|
+
import os as _os
|
|
116
|
+
import tempfile as _tempfile
|
|
117
|
+
|
|
118
|
+
if path.exists():
|
|
119
|
+
return
|
|
120
|
+
try:
|
|
121
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
122
|
+
content = _json.dumps(_BOOTSTRAP_TEMPLATE, indent=2, ensure_ascii=False) + "\n"
|
|
123
|
+
tmp_fd, tmp_path = _tempfile.mkstemp(dir=str(path.parent), suffix=".tmp")
|
|
124
|
+
try:
|
|
125
|
+
_os.write(tmp_fd, content.encode("utf-8"))
|
|
126
|
+
_os.close(tmp_fd)
|
|
127
|
+
_os.replace(tmp_path, str(path))
|
|
128
|
+
except BaseException:
|
|
129
|
+
try:
|
|
130
|
+
_os.close(tmp_fd)
|
|
131
|
+
except OSError:
|
|
132
|
+
pass
|
|
133
|
+
try:
|
|
134
|
+
_os.unlink(tmp_path)
|
|
135
|
+
except OSError:
|
|
136
|
+
pass
|
|
137
|
+
raise
|
|
138
|
+
except OSError as exc:
|
|
139
|
+
_log.warning(
|
|
140
|
+
"allowlist: could not bootstrap empty template at %s (%s); "
|
|
141
|
+
"fresh project will see no allowlist on first gate run.",
|
|
142
|
+
path, exc,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def load_allowlist(project_dir: Path) -> list[AllowlistEntry]:
|
|
147
|
+
"""Load false positive allowlist from .prompt-engineer/forensic_gates/.
|
|
148
|
+
|
|
149
|
+
B3 wave: corrupt JSON / unreadable file no longer silently returns ``[]``.
|
|
150
|
+
The failure is surfaced via ``meta.allowlist_corrupted`` so operators see
|
|
151
|
+
that acknowledged false positives are no longer being honored.
|
|
152
|
+
|
|
153
|
+
Sprint C1 (2026-04-25): on first read in a fresh project, drop an empty
|
|
154
|
+
template at the canonical path so executor agents have a writable file
|
|
155
|
+
for FP triage instead of looping on "fix FP" with nowhere to record it.
|
|
156
|
+
"""
|
|
157
|
+
import json as _json
|
|
158
|
+
path = project_dir / ".prompt-engineer" / "forensic_gates" / "false_positive_allowlist.json"
|
|
159
|
+
if not path.exists():
|
|
160
|
+
_bootstrap_allowlist_template(path)
|
|
161
|
+
return []
|
|
162
|
+
try:
|
|
163
|
+
raw_text = path.read_text(encoding="utf-8")
|
|
164
|
+
except (OSError, PermissionError) as exc:
|
|
165
|
+
from vigil_forensic.meta_findings import emit_meta_finding
|
|
166
|
+
emit_meta_finding(
|
|
167
|
+
"meta.allowlist_corrupted",
|
|
168
|
+
path=str(path),
|
|
169
|
+
detail=f"{type(exc).__name__}: {exc}",
|
|
170
|
+
)
|
|
171
|
+
return []
|
|
172
|
+
try:
|
|
173
|
+
data = _json.loads(raw_text)
|
|
174
|
+
except _json.JSONDecodeError as exc:
|
|
175
|
+
from vigil_forensic.meta_findings import emit_meta_finding
|
|
176
|
+
emit_meta_finding(
|
|
177
|
+
"meta.allowlist_corrupted",
|
|
178
|
+
path=str(path),
|
|
179
|
+
detail=f"JSONDecodeError: {exc}",
|
|
180
|
+
)
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
import time as _time_mod
|
|
184
|
+
|
|
185
|
+
entries: list[AllowlistEntry] = []
|
|
186
|
+
for item in (data if isinstance(data, list) else []):
|
|
187
|
+
try:
|
|
188
|
+
# Backward compat: legacy entries without `created_at` are treated
|
|
189
|
+
# as fresh on first load (use current time so TTL doesn't fire
|
|
190
|
+
# immediately on entries written before the safety mechanism).
|
|
191
|
+
raw_created = item.get("created_at", None)
|
|
192
|
+
if raw_created is None or raw_created == "":
|
|
193
|
+
created_at = float(_time_mod.time())
|
|
194
|
+
else:
|
|
195
|
+
created_at = float(raw_created)
|
|
196
|
+
raw_ttl = item.get("ttl_days", DEFAULT_PE_TTL_DAYS)
|
|
197
|
+
try:
|
|
198
|
+
ttl_days = int(raw_ttl) if raw_ttl not in (None, "") else DEFAULT_PE_TTL_DAYS
|
|
199
|
+
except (TypeError, ValueError):
|
|
200
|
+
ttl_days = DEFAULT_PE_TTL_DAYS
|
|
201
|
+
entry = AllowlistEntry(
|
|
202
|
+
fingerprint=str(item.get("fingerprint", "")),
|
|
203
|
+
check_id=str(item.get("check_id", "")),
|
|
204
|
+
file=str(item.get("file", "")),
|
|
205
|
+
line=int(item.get("line", 0)),
|
|
206
|
+
reason=str(item.get("reason", "")),
|
|
207
|
+
evidence_type=str(item.get("evidence_type", "")),
|
|
208
|
+
evidence=str(item.get("evidence", "")),
|
|
209
|
+
added_by=str(item.get("added_by", "")),
|
|
210
|
+
added_at=str(item.get("added_at", "")),
|
|
211
|
+
reviewed_by=str(item.get("reviewed_by", "")),
|
|
212
|
+
expires_at=str(item.get("expires_at", "")),
|
|
213
|
+
classifier=str(item.get("classifier", "")),
|
|
214
|
+
classified_at=str(item.get("classified_at", "")),
|
|
215
|
+
created_at=created_at,
|
|
216
|
+
ttl_days=ttl_days,
|
|
217
|
+
code_hash=str(item.get("code_hash", "")),
|
|
218
|
+
)
|
|
219
|
+
except (AttributeError, TypeError, ValueError) as exc:
|
|
220
|
+
from vigil_forensic.meta_findings import emit_meta_finding
|
|
221
|
+
emit_meta_finding(
|
|
222
|
+
"meta.allowlist_corrupted",
|
|
223
|
+
path=str(path),
|
|
224
|
+
detail=(
|
|
225
|
+
f"{type(exc).__name__} coercing entry {item!r}: {exc}"
|
|
226
|
+
),
|
|
227
|
+
)
|
|
228
|
+
continue
|
|
229
|
+
entries.append(entry)
|
|
230
|
+
return _filter_expired_or_drifted(entries, project_dir)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _filter_expired_or_drifted(
|
|
234
|
+
entries: list[AllowlistEntry],
|
|
235
|
+
project_dir: Path,
|
|
236
|
+
) -> list[AllowlistEntry]:
|
|
237
|
+
"""Drop entries past their TTL or whose evidence file's hash drifted.
|
|
238
|
+
|
|
239
|
+
Sprint (2026-04-24) — Mechanism 1 (TTL) + Mechanism 2 (code-hash
|
|
240
|
+
invalidation). Both checks fail-soft: if any input is malformed (NaN
|
|
241
|
+
timestamp, weird ttl, unreadable file) the entry is kept rather than
|
|
242
|
+
silently dropped, except in the explicit "expired" or "hash-mismatch"
|
|
243
|
+
cases. A missing file or empty `code_hash` skips only the hash check —
|
|
244
|
+
TTL still applies.
|
|
245
|
+
"""
|
|
246
|
+
import time as _time_mod
|
|
247
|
+
# standalone: code-hash stamping unavailable
|
|
248
|
+
compute_code_hash = None # type: ignore[assignment]
|
|
249
|
+
|
|
250
|
+
now = float(_time_mod.time())
|
|
251
|
+
kept: list[AllowlistEntry] = []
|
|
252
|
+
for entry in entries:
|
|
253
|
+
# TTL check.
|
|
254
|
+
if entry.ttl_days > 0 and entry.created_at > 0.0:
|
|
255
|
+
age = now - entry.created_at
|
|
256
|
+
if age > float(entry.ttl_days) * 86400.0:
|
|
257
|
+
_log.debug(
|
|
258
|
+
"allowlist: dropping fingerprint=%r — TTL expired (%.1f days > %d days)",
|
|
259
|
+
entry.fingerprint, age / 86400.0, entry.ttl_days,
|
|
260
|
+
)
|
|
261
|
+
continue
|
|
262
|
+
# Code-hash check (only if entry has a non-empty stamped hash AND
|
|
263
|
+
# the file currently exists). Missing file or empty hash → skip.
|
|
264
|
+
if entry.code_hash and entry.file:
|
|
265
|
+
try:
|
|
266
|
+
file_abs = (project_dir / entry.file)
|
|
267
|
+
except (TypeError, ValueError):
|
|
268
|
+
file_abs = None
|
|
269
|
+
if file_abs is not None and file_abs.is_file() and compute_code_hash is not None:
|
|
270
|
+
current = compute_code_hash(file_abs)
|
|
271
|
+
if current and current != entry.code_hash:
|
|
272
|
+
_log.debug(
|
|
273
|
+
"allowlist: dropping fingerprint=%r — code_hash drift "
|
|
274
|
+
"(stamped=%s now=%s)",
|
|
275
|
+
entry.fingerprint, entry.code_hash[:12], current[:12],
|
|
276
|
+
)
|
|
277
|
+
continue
|
|
278
|
+
kept.append(entry)
|
|
279
|
+
return kept
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def revalidate_allowlist(
|
|
283
|
+
project_dir: Path,
|
|
284
|
+
allowlist: list[AllowlistEntry],
|
|
285
|
+
) -> tuple[list[AllowlistEntry], list[AllowlistEntry]]:
|
|
286
|
+
"""Revalidate allowlist entries against current project state.
|
|
287
|
+
|
|
288
|
+
Returns (still_valid, invalidated).
|
|
289
|
+
An entry is invalidated if:
|
|
290
|
+
- File no longer exists
|
|
291
|
+
- grep_proof: the evidence pattern no longer matches in the file
|
|
292
|
+
- Line number drifted beyond recognition (file shrunk past that line)
|
|
293
|
+
- Entry has expired (expires_at in the past)
|
|
294
|
+
"""
|
|
295
|
+
import time as _time
|
|
296
|
+
|
|
297
|
+
still_valid: list[AllowlistEntry] = []
|
|
298
|
+
invalidated: list[AllowlistEntry] = []
|
|
299
|
+
|
|
300
|
+
for entry in allowlist:
|
|
301
|
+
if not entry.is_valid():
|
|
302
|
+
invalidated.append(entry)
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
# Check expiry
|
|
306
|
+
if entry.expires_at:
|
|
307
|
+
try:
|
|
308
|
+
import datetime
|
|
309
|
+
exp = datetime.datetime.fromisoformat(entry.expires_at.replace("Z", "+00:00"))
|
|
310
|
+
if exp.timestamp() < _time.time():
|
|
311
|
+
invalidated.append(entry)
|
|
312
|
+
continue
|
|
313
|
+
except (ValueError, TypeError):
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
# Check file still exists
|
|
317
|
+
file_path = project_dir / entry.file
|
|
318
|
+
if not file_path.exists():
|
|
319
|
+
invalidated.append(entry)
|
|
320
|
+
continue
|
|
321
|
+
|
|
322
|
+
# Check line still in range
|
|
323
|
+
try:
|
|
324
|
+
content = file_path.read_text(encoding="utf-8", errors="replace")
|
|
325
|
+
line_count = content.count("\n") + 1
|
|
326
|
+
if entry.line > 0 and entry.line > line_count:
|
|
327
|
+
invalidated.append(entry)
|
|
328
|
+
continue
|
|
329
|
+
except OSError:
|
|
330
|
+
invalidated.append(entry)
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
# For grep_proof: verify the evidence pattern still matches
|
|
334
|
+
if entry.evidence_type == "grep_proof":
|
|
335
|
+
# Extract a key phrase from evidence to grep for
|
|
336
|
+
# Evidence format: "grep shows X used at line Y" or similar
|
|
337
|
+
# We check if any meaningful word from evidence exists in the file
|
|
338
|
+
evidence_words = [
|
|
339
|
+
w for w in entry.evidence.split()
|
|
340
|
+
if len(w) > 4 and w.isalnum()
|
|
341
|
+
]
|
|
342
|
+
if evidence_words:
|
|
343
|
+
found_any = any(w in content for w in evidence_words[:3])
|
|
344
|
+
if not found_any:
|
|
345
|
+
invalidated.append(entry)
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
still_valid.append(entry)
|
|
349
|
+
|
|
350
|
+
return still_valid, invalidated
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def save_allowlist(project_dir: Path, entries: list[AllowlistEntry]) -> Path:
|
|
354
|
+
"""Write allowlist back to disk (after revalidation cleanup)."""
|
|
355
|
+
import json as _json
|
|
356
|
+
import os as _os
|
|
357
|
+
import tempfile as _tempfile
|
|
358
|
+
path = project_dir / ".prompt-engineer" / "forensic_gates" / "false_positive_allowlist.json"
|
|
359
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
360
|
+
content = _json.dumps([e.to_dict() for e in entries], indent=2, ensure_ascii=False) + "\n"
|
|
361
|
+
tmp_fd, tmp_path = _tempfile.mkstemp(dir=str(path.parent), suffix=".tmp")
|
|
362
|
+
try:
|
|
363
|
+
_os.write(tmp_fd, content.encode("utf-8"))
|
|
364
|
+
_os.close(tmp_fd)
|
|
365
|
+
_os.replace(tmp_path, str(path))
|
|
366
|
+
except BaseException:
|
|
367
|
+
try:
|
|
368
|
+
_os.close(tmp_fd)
|
|
369
|
+
except OSError:
|
|
370
|
+
pass
|
|
371
|
+
try:
|
|
372
|
+
_os.unlink(tmp_path)
|
|
373
|
+
except OSError:
|
|
374
|
+
pass
|
|
375
|
+
raise
|
|
376
|
+
return path
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def filter_by_allowlist(
|
|
380
|
+
findings: list, # list of GateFinding
|
|
381
|
+
allowlist: list[AllowlistEntry],
|
|
382
|
+
project_dir: Optional[Path] = None,
|
|
383
|
+
) -> tuple[list, list, list[str]]:
|
|
384
|
+
"""Split findings into (remaining, filtered_out, notes) based on valid allowlist entries.
|
|
385
|
+
|
|
386
|
+
If project_dir is provided, revalidates entries first and removes stale ones.
|
|
387
|
+
Only entries with valid proof are honored. Invalid entries are ignored.
|
|
388
|
+
"""
|
|
389
|
+
notes: list[str] = []
|
|
390
|
+
|
|
391
|
+
if not allowlist:
|
|
392
|
+
return findings, [], notes
|
|
393
|
+
|
|
394
|
+
# Revalidate if project_dir available
|
|
395
|
+
if project_dir is not None:
|
|
396
|
+
valid_entries, invalidated = revalidate_allowlist(project_dir, allowlist)
|
|
397
|
+
if invalidated:
|
|
398
|
+
notes.append(
|
|
399
|
+
f"[allowlist] Removed {len(invalidated)} stale/invalid entries "
|
|
400
|
+
f"(file deleted, proof invalidated, or expired)"
|
|
401
|
+
)
|
|
402
|
+
# Save cleaned allowlist back to disk
|
|
403
|
+
try:
|
|
404
|
+
save_allowlist(project_dir, valid_entries)
|
|
405
|
+
except OSError:
|
|
406
|
+
pass
|
|
407
|
+
allowlist = valid_entries
|
|
408
|
+
|
|
409
|
+
valid_fps = {
|
|
410
|
+
entry.fingerprint
|
|
411
|
+
for entry in allowlist
|
|
412
|
+
if entry.is_valid()
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
if not valid_fps:
|
|
416
|
+
return findings, [], notes
|
|
417
|
+
|
|
418
|
+
remaining = []
|
|
419
|
+
filtered = []
|
|
420
|
+
for finding in findings:
|
|
421
|
+
if finding.fingerprint in valid_fps:
|
|
422
|
+
filtered.append(finding)
|
|
423
|
+
else:
|
|
424
|
+
remaining.append(finding)
|
|
425
|
+
|
|
426
|
+
return remaining, filtered, notes
|