vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
"""Contract shape drift forensic gate (Finding 6.4).
|
|
2
|
+
|
|
3
|
+
contract_shape_drift: detect dataclass field additions and removals between
|
|
4
|
+
HEAD~1 and the working tree. Field rename is structurally undetectable (it
|
|
5
|
+
appears as a remove + add and is therefore reported as both), which is
|
|
6
|
+
intentional -- the caller is informed of each component change separately.
|
|
7
|
+
|
|
8
|
+
Severities:
|
|
9
|
+
REMOVED field -> MEDIUM (potentially breaking for serialised state)
|
|
10
|
+
ADDED field WITH default -> LOW (schema evolution, non-breaking)
|
|
11
|
+
ADDED field WITHOUT default (required) -> MEDIUM (breaking for existing records)
|
|
12
|
+
|
|
13
|
+
api.public_function_signature_change sub-check (G.9):
|
|
14
|
+
Compares public function parameter lists between prior (HEAD~1) and current.
|
|
15
|
+
- Parameter removed or renamed (positional shift) -> HIGH / REVISE
|
|
16
|
+
- No git baseline (no work tree, or no changed file resolves at HEAD~1):
|
|
17
|
+
the whole signature check is SKIPPED and reported once via
|
|
18
|
+
meta.git_unavailable. The old docstring-param-count degraded heuristic was
|
|
19
|
+
removed — it produced false positives on documented variadic APIs
|
|
20
|
+
(``option(*param_decls, **attrs)``).
|
|
21
|
+
allowlist_allowed=False for all signature findings.
|
|
22
|
+
|
|
23
|
+
F18a (2026-04-23): AI-Host-specific new-class checks moved to
|
|
24
|
+
``SYSTEM/pipeline/gates/cross_cutting_checks/ai_host_contract_checks.py``.
|
|
25
|
+
The following sub-checks are NOT emitted by this universal gate any more:
|
|
26
|
+
- ``contract_shape_drift.new_class_missing_identity``
|
|
27
|
+
- ``contract_shape_drift.new_class_missing_schema_version``
|
|
28
|
+
The helpers they depend on (``IDENTITY_FIELDS``, ``DATACLASS_RE``,
|
|
29
|
+
``_is_exempt``, ``_extract_dataclass_fields``) remain here for AI-Host's
|
|
30
|
+
``ai_host_contract_checks`` to import.
|
|
31
|
+
|
|
32
|
+
Fails open: git unavailable or any I/O error -> skip file, never crash.
|
|
33
|
+
"""
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import ast
|
|
37
|
+
import logging
|
|
38
|
+
import re
|
|
39
|
+
|
|
40
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity, RepairKind
|
|
41
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
42
|
+
from ..source_analysis import is_source_file
|
|
43
|
+
from .common import build_check_result, build_finding, normalize_path
|
|
44
|
+
from vigil_forensic._git_utils import git_show as _git_show, git_has_repo as _git_has_repo
|
|
45
|
+
|
|
46
|
+
_log = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
# Identity fields expected on persistent @dataclass entities.
|
|
49
|
+
IDENTITY_FIELDS: frozenset[str] = frozenset(
|
|
50
|
+
{"project_id", "task_id", "session_id", "attempt_id", "id", "run_id"}
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# G.5: exemption markers in class docstring / body comments.
|
|
54
|
+
# A class is exempt from new-class G.5 checks if either word appears anywhere
|
|
55
|
+
# in its body (docstring or comment).
|
|
56
|
+
_EXEMPT_MARKERS_RE = re.compile(r"\b(internal|non-persisted)\b", re.IGNORECASE)
|
|
57
|
+
|
|
58
|
+
# Matches the body of a @dataclass block. The pattern captures:
|
|
59
|
+
# group 1 — class name
|
|
60
|
+
# group 2 — indented body lines (one level, 4 spaces)
|
|
61
|
+
# NOTE: re.DOTALL is required so '.' spans newlines inside the body group.
|
|
62
|
+
DATACLASS_RE = re.compile(
|
|
63
|
+
r"@dataclass[^\n]*\nclass\s+(\w+)[^\n]*:\n((?: [^\n]*\n)*)",
|
|
64
|
+
re.DOTALL,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# A field declaration line: exactly 4-space indent + identifier + colon + type annotation.
|
|
68
|
+
# Group 1 — field name.
|
|
69
|
+
# Group 2 — remainder of the line after the type annotation (may contain '=' for default).
|
|
70
|
+
FIELD_RE = re.compile(r"^ (\w+):\s[^\n]*(.*)", re.MULTILINE)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _is_exempt(body: str) -> bool:
|
|
74
|
+
"""Return True if the dataclass body contains an exemption marker.
|
|
75
|
+
|
|
76
|
+
A class is exempt from the G.5 new-class identity/schema_version checks
|
|
77
|
+
when its indented body (docstring or any comment line) contains the word
|
|
78
|
+
"internal" or "non-persisted". The check is case-insensitive.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
body: The indented body block captured by DATACLASS_RE group 2.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
True when an exemption marker is found; False otherwise.
|
|
85
|
+
"""
|
|
86
|
+
return bool(_EXEMPT_MARKERS_RE.search(body))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _field_has_default(remainder: str) -> bool:
|
|
90
|
+
"""Return True if the field line contains an assignment (``=``) indicating
|
|
91
|
+
a default value or ``default_factory`` via ``field(...)``.
|
|
92
|
+
|
|
93
|
+
The ``remainder`` argument is everything on the field line after the type
|
|
94
|
+
annotation identifier. An ``=`` anywhere in that text means the field has
|
|
95
|
+
a default; its absence means the field is required.
|
|
96
|
+
"""
|
|
97
|
+
return "=" in remainder
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _extract_dataclass_fields(content: str) -> dict[str, dict[str, bool]]:
|
|
101
|
+
"""Return ``{class_name: {field_name: has_default}}`` for every @dataclass in *content*.
|
|
102
|
+
|
|
103
|
+
``has_default`` is ``True`` when the field carries a default value or
|
|
104
|
+
``default_factory`` (i.e. ``field_name: type = ...`` or
|
|
105
|
+
``field_name: type = field(default_factory=...)``). ``False`` means the
|
|
106
|
+
field is required -- adding it is a breaking change for existing records.
|
|
107
|
+
|
|
108
|
+
Only direct body lines (4-space indent) are considered to avoid matching
|
|
109
|
+
nested class or method bodies. Returns an empty dict when *content*
|
|
110
|
+
contains no dataclasses.
|
|
111
|
+
"""
|
|
112
|
+
result: dict[str, dict[str, bool]] = {}
|
|
113
|
+
for m in DATACLASS_RE.finditer(content):
|
|
114
|
+
class_name = m.group(1)
|
|
115
|
+
body = m.group(2)
|
|
116
|
+
fields: dict[str, bool] = {}
|
|
117
|
+
for field_match in FIELD_RE.finditer(body):
|
|
118
|
+
field_name = field_match.group(1)
|
|
119
|
+
line_tail = field_match.group(0) # full matched line
|
|
120
|
+
fields[field_name] = _field_has_default(line_tail)
|
|
121
|
+
result[class_name] = fields
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# G.9: Public function signature drift helpers
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _extract_public_func_signatures(content: str) -> dict[str, list[str]]:
|
|
131
|
+
"""Return ``{func_name: [param_name, ...]}`` for every top-level public
|
|
132
|
+
function in *content*.
|
|
133
|
+
|
|
134
|
+
Only module-level ``def`` statements are considered (not class methods).
|
|
135
|
+
Names starting with ``_`` are skipped.
|
|
136
|
+
Returns empty dict on SyntaxError.
|
|
137
|
+
|
|
138
|
+
The ``self`` and ``cls`` parameters are excluded from the returned list
|
|
139
|
+
because they are not part of the public API contract.
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
tree = ast.parse(content)
|
|
143
|
+
except SyntaxError:
|
|
144
|
+
return {}
|
|
145
|
+
|
|
146
|
+
result: dict[str, list[str]] = {}
|
|
147
|
+
for node in ast.iter_child_nodes(tree):
|
|
148
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
149
|
+
continue
|
|
150
|
+
if node.name.startswith("_"):
|
|
151
|
+
continue
|
|
152
|
+
params: list[str] = []
|
|
153
|
+
for arg in node.args.posonlyargs + node.args.args + node.args.kwonlyargs:
|
|
154
|
+
if arg.arg in ("self", "cls"):
|
|
155
|
+
continue
|
|
156
|
+
params.append(arg.arg)
|
|
157
|
+
if node.args.vararg:
|
|
158
|
+
params.append(f"*{node.args.vararg.arg}")
|
|
159
|
+
if node.args.kwarg:
|
|
160
|
+
params.append(f"**{node.args.kwarg.arg}")
|
|
161
|
+
result[node.name] = params
|
|
162
|
+
return result
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# _count_docstring_params was removed with the no-git degraded-mode signature
|
|
166
|
+
# heuristic (FP fix): counting :param/Args: docstring entries as a proxy for
|
|
167
|
+
# expected param count misfired on documented variadic APIs.
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _run_api_signature_checks(
|
|
171
|
+
normalized: str,
|
|
172
|
+
prior_content: str | None,
|
|
173
|
+
current_content: str,
|
|
174
|
+
) -> list:
|
|
175
|
+
"""Return findings for public function signature changes in a single file.
|
|
176
|
+
|
|
177
|
+
Compares parameter lists against the prior git snapshot. When
|
|
178
|
+
*prior_content* is None the file is new (no prior to diff) and nothing is
|
|
179
|
+
emitted — the caller only invokes this when a real git baseline exists for
|
|
180
|
+
the change set, so there is no docstring-heuristic degraded mode any more
|
|
181
|
+
(it produced false positives on documented variadic APIs).
|
|
182
|
+
"""
|
|
183
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity
|
|
184
|
+
|
|
185
|
+
if prior_content is None:
|
|
186
|
+
# New file — no prior signature to diff against. Not a regression.
|
|
187
|
+
return []
|
|
188
|
+
|
|
189
|
+
findings_out = []
|
|
190
|
+
current_sigs = _extract_public_func_signatures(current_content)
|
|
191
|
+
prior_sigs = _extract_public_func_signatures(prior_content)
|
|
192
|
+
for func_name, current_params in current_sigs.items():
|
|
193
|
+
if func_name not in prior_sigs:
|
|
194
|
+
# New function — not a regression.
|
|
195
|
+
continue
|
|
196
|
+
prior_params = prior_sigs[func_name]
|
|
197
|
+
if prior_params == current_params:
|
|
198
|
+
continue
|
|
199
|
+
# Detect removed or renamed (positional mismatch) parameters.
|
|
200
|
+
removed = [p for p in prior_params if p not in current_params]
|
|
201
|
+
if not removed:
|
|
202
|
+
# Only additions — not a breaking change.
|
|
203
|
+
continue
|
|
204
|
+
findings_out.append(
|
|
205
|
+
build_finding(
|
|
206
|
+
check_id="api.public_function_signature_change",
|
|
207
|
+
category=GateCategory.DRIFT,
|
|
208
|
+
title=(
|
|
209
|
+
f"Public API signature changed: {func_name} — "
|
|
210
|
+
f"parameter(s) removed/renamed"
|
|
211
|
+
),
|
|
212
|
+
severity=GateSeverity.HIGH,
|
|
213
|
+
impact=GateImpact.REVISE,
|
|
214
|
+
summary=(
|
|
215
|
+
f"{normalized}::{func_name} — prior params: {prior_params}, "
|
|
216
|
+
f"current params: {current_params}. "
|
|
217
|
+
f"Removed/renamed: {removed}."
|
|
218
|
+
),
|
|
219
|
+
recommendation=(
|
|
220
|
+
f"Public API signature changed: {func_name}. "
|
|
221
|
+
f"Either revert, add deprecation shim, or document as breaking change."
|
|
222
|
+
),
|
|
223
|
+
evidence=[
|
|
224
|
+
EvidenceReference(
|
|
225
|
+
kind="file",
|
|
226
|
+
path=normalized,
|
|
227
|
+
detail=(
|
|
228
|
+
f"func {func_name}: prior={prior_params}, "
|
|
229
|
+
f"current={current_params}"
|
|
230
|
+
),
|
|
231
|
+
)
|
|
232
|
+
],
|
|
233
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
234
|
+
executor_action=(
|
|
235
|
+
f"Public API signature changed: {func_name}. "
|
|
236
|
+
f"Either revert, add deprecation shim, or document as breaking change."
|
|
237
|
+
),
|
|
238
|
+
proof_required=(
|
|
239
|
+
"all external callers updated; deprecation warning added if kept; "
|
|
240
|
+
"CHANGELOG entry"
|
|
241
|
+
),
|
|
242
|
+
allowlist_allowed=False,
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return findings_out
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def run_contract_shape_drift_checks(ctx: PostExecGateContext):
|
|
250
|
+
"""Emit findings for dataclass field removals (MEDIUM) and additions (LOW).
|
|
251
|
+
|
|
252
|
+
For each changed .py file:
|
|
253
|
+
- Fetch prior content via git show HEAD~1.
|
|
254
|
+
- Extract @dataclass field sets for every class before and after.
|
|
255
|
+
- For classes present in both snapshots, compare field sets.
|
|
256
|
+
- REMOVED fields -> MEDIUM finding.
|
|
257
|
+
- ADDED fields -> LOW finding.
|
|
258
|
+
|
|
259
|
+
New files (no prior content) and non-.py paths are skipped.
|
|
260
|
+
Fails open: any exception -> skip file.
|
|
261
|
+
"""
|
|
262
|
+
findings = []
|
|
263
|
+
|
|
264
|
+
for raw_path in ctx.changed_files_observed:
|
|
265
|
+
normalized = normalize_path(raw_path)
|
|
266
|
+
if not is_source_file(normalized):
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
prior = _git_show(normalized)
|
|
270
|
+
if prior is None:
|
|
271
|
+
# New file — universal contract_shape_drift only reasons about
|
|
272
|
+
# field-level drift vs. a prior snapshot, so nothing to emit here.
|
|
273
|
+
# AI-Host-specific "new class missing identity / schema_version"
|
|
274
|
+
# checks moved to SYSTEM/pipeline/gates/cross_cutting_checks/
|
|
275
|
+
# ai_host_contract_checks.py (F18a).
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
abs_path = ctx.project_dir / normalized
|
|
279
|
+
try:
|
|
280
|
+
current = abs_path.read_text(encoding="utf-8")
|
|
281
|
+
except (OSError, UnicodeDecodeError) as exc:
|
|
282
|
+
_log.debug("contract_shape_drift: cannot read current file %s: %s", normalized, exc)
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
before_map = _extract_dataclass_fields(prior)
|
|
286
|
+
after_map = _extract_dataclass_fields(current)
|
|
287
|
+
|
|
288
|
+
for class_name, before_fields in before_map.items():
|
|
289
|
+
if class_name not in after_map:
|
|
290
|
+
# Entire class removed — out of scope for field-level drift.
|
|
291
|
+
continue
|
|
292
|
+
after_fields = after_map[class_name]
|
|
293
|
+
|
|
294
|
+
removed = set(before_fields.keys()) - set(after_fields.keys())
|
|
295
|
+
added = set(after_fields.keys()) - set(before_fields.keys())
|
|
296
|
+
|
|
297
|
+
if removed:
|
|
298
|
+
findings.append(
|
|
299
|
+
build_finding(
|
|
300
|
+
check_id="contract_shape_drift.field_removed",
|
|
301
|
+
category=GateCategory.DRIFT,
|
|
302
|
+
title="Dataclass field(s) removed -- potential breaking schema change",
|
|
303
|
+
severity=GateSeverity.MEDIUM,
|
|
304
|
+
impact=GateImpact.REVISE,
|
|
305
|
+
summary=(
|
|
306
|
+
f"{normalized}::{class_name} -- removed field(s): "
|
|
307
|
+
f"{', '.join(sorted(removed))}."
|
|
308
|
+
),
|
|
309
|
+
recommendation=(
|
|
310
|
+
"Removing dataclass fields can break deserialisation of persisted "
|
|
311
|
+
"state. Verify that no stored artefacts rely on the removed "
|
|
312
|
+
f"field(s) before merging this change to {class_name}."
|
|
313
|
+
),
|
|
314
|
+
evidence=[
|
|
315
|
+
EvidenceReference(
|
|
316
|
+
kind="file",
|
|
317
|
+
path=normalized,
|
|
318
|
+
detail=f"class {class_name}: removed {sorted(removed)}",
|
|
319
|
+
)
|
|
320
|
+
],
|
|
321
|
+
|
|
322
|
+
repair_kind='fix_contract',
|
|
323
|
+
executor_action='Fix contract drift',
|
|
324
|
+
proof_required='Contract fields stable',
|
|
325
|
+
allowlist_allowed=False,
|
|
326
|
+
)
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# Split added fields: required (no default) vs optional (has default).
|
|
330
|
+
required_added = sorted(f for f in added if not after_fields[f])
|
|
331
|
+
optional_added = sorted(f for f in added if after_fields[f])
|
|
332
|
+
|
|
333
|
+
if required_added:
|
|
334
|
+
findings.append(
|
|
335
|
+
build_finding(
|
|
336
|
+
check_id="contract_shape_drift.required_field_added",
|
|
337
|
+
category=GateCategory.DRIFT,
|
|
338
|
+
title="Dataclass required field(s) added -- breaking schema change",
|
|
339
|
+
severity=GateSeverity.MEDIUM,
|
|
340
|
+
impact=GateImpact.REVISE,
|
|
341
|
+
summary=(
|
|
342
|
+
f"{normalized}::{class_name} -- added required field(s) (no default): "
|
|
343
|
+
f"{', '.join(required_added)}."
|
|
344
|
+
),
|
|
345
|
+
recommendation=(
|
|
346
|
+
"Adding required fields (no default value) breaks deserialisation of "
|
|
347
|
+
"existing persisted records and all existing construction sites. "
|
|
348
|
+
f"Add a default value to each new field in {class_name}, or perform "
|
|
349
|
+
"a coordinated migration of all persisted state."
|
|
350
|
+
),
|
|
351
|
+
evidence=[
|
|
352
|
+
EvidenceReference(
|
|
353
|
+
kind="file",
|
|
354
|
+
path=normalized,
|
|
355
|
+
detail=f"class {class_name}: added required {required_added}",
|
|
356
|
+
)
|
|
357
|
+
],
|
|
358
|
+
|
|
359
|
+
repair_kind='fix_contract',
|
|
360
|
+
executor_action='Fix contract drift',
|
|
361
|
+
proof_required='Contract fields stable',
|
|
362
|
+
allowlist_allowed=False,
|
|
363
|
+
)
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
if optional_added:
|
|
367
|
+
findings.append(
|
|
368
|
+
build_finding(
|
|
369
|
+
check_id="contract_shape_drift.field_added",
|
|
370
|
+
category=GateCategory.DRIFT,
|
|
371
|
+
title="Dataclass field(s) added -- schema evolution detected",
|
|
372
|
+
severity=GateSeverity.LOW,
|
|
373
|
+
impact=GateImpact.REVISE,
|
|
374
|
+
summary=(
|
|
375
|
+
f"{normalized}::{class_name} -- added field(s) with defaults: "
|
|
376
|
+
f"{', '.join(optional_added)}."
|
|
377
|
+
),
|
|
378
|
+
recommendation=(
|
|
379
|
+
"New dataclass fields are non-breaking when they carry defaults. "
|
|
380
|
+
"Confirm that the new field(s) have default values or that all "
|
|
381
|
+
f"construction sites of {class_name} have been updated."
|
|
382
|
+
),
|
|
383
|
+
evidence=[
|
|
384
|
+
EvidenceReference(
|
|
385
|
+
kind="file",
|
|
386
|
+
path=normalized,
|
|
387
|
+
detail=f"class {class_name}: added optional {optional_added}",
|
|
388
|
+
)
|
|
389
|
+
],
|
|
390
|
+
|
|
391
|
+
repair_kind='fix_contract',
|
|
392
|
+
executor_action='Fix contract drift',
|
|
393
|
+
proof_required='Contract fields stable',
|
|
394
|
+
allowlist_allowed=False,
|
|
395
|
+
)
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# G.9: api.public_function_signature_change sub-check — piggybacks on the
|
|
399
|
+
# same per-file loop context already built above. Re-walk changed_files_observed
|
|
400
|
+
# to keep the two concerns cleanly separated inside this function.
|
|
401
|
+
#
|
|
402
|
+
# FP fix: signature-drift is only meaningful against a git baseline. The old
|
|
403
|
+
# degraded path fell back to a docstring-param-count heuristic that misfired
|
|
404
|
+
# on documented variadic APIs (``option(*param_decls, **attrs)`` with a
|
|
405
|
+
# 3-param docstring → "0 params vs 3 documented"; verified on click/mcp).
|
|
406
|
+
#
|
|
407
|
+
# "No baseline" covers two cases that both produce only false positives:
|
|
408
|
+
# 1. the target is not in a git work tree at all, OR
|
|
409
|
+
# 2. it is inside a work tree but NONE of the changed files have prior
|
|
410
|
+
# content at HEAD~1 (e.g. a gitignored vendored / site-packages dir).
|
|
411
|
+
# In either case, skip the whole signature check and surface the skip ONCE
|
|
412
|
+
# via meta.git_unavailable instead of emitting per-file FPs. When a real
|
|
413
|
+
# baseline exists (at least one file resolves at HEAD~1) the check runs
|
|
414
|
+
# exactly as before.
|
|
415
|
+
source_paths = [
|
|
416
|
+
normalize_path(p) for p in ctx.changed_files_observed
|
|
417
|
+
if is_source_file(normalize_path(p))
|
|
418
|
+
]
|
|
419
|
+
priors: dict[str, str | None] = {}
|
|
420
|
+
has_baseline = False
|
|
421
|
+
if _git_has_repo(ctx.project_dir):
|
|
422
|
+
for normalized in source_paths:
|
|
423
|
+
prior = _git_show(normalized)
|
|
424
|
+
priors[normalized] = prior
|
|
425
|
+
if prior is not None:
|
|
426
|
+
has_baseline = True
|
|
427
|
+
|
|
428
|
+
if not has_baseline:
|
|
429
|
+
from vigil_forensic.meta_findings import emit_meta_finding
|
|
430
|
+
emit_meta_finding(
|
|
431
|
+
"meta.git_unavailable",
|
|
432
|
+
path=str(ctx.project_dir),
|
|
433
|
+
detail=(
|
|
434
|
+
"api.public_function_signature_change skipped: no git baseline "
|
|
435
|
+
"available (signature-drift needs HEAD~1 to be meaningful)."
|
|
436
|
+
),
|
|
437
|
+
)
|
|
438
|
+
else:
|
|
439
|
+
for normalized in source_paths:
|
|
440
|
+
abs_path = ctx.project_dir / normalized
|
|
441
|
+
try:
|
|
442
|
+
current = abs_path.read_text(encoding="utf-8")
|
|
443
|
+
except (OSError, UnicodeDecodeError) as exc:
|
|
444
|
+
_log.debug(
|
|
445
|
+
"contract_shape_drift(sig): cannot read current file %s: %s",
|
|
446
|
+
normalized,
|
|
447
|
+
exc,
|
|
448
|
+
)
|
|
449
|
+
continue
|
|
450
|
+
prior = priors.get(normalized)
|
|
451
|
+
# Per-file: a new file (prior is None) in an otherwise-baselined repo
|
|
452
|
+
# is correctly a no-op inside _run_api_signature_checks.
|
|
453
|
+
findings.extend(_run_api_signature_checks(normalized, prior, current))
|
|
454
|
+
|
|
455
|
+
return build_check_result(
|
|
456
|
+
check_id="contract_shape_drift",
|
|
457
|
+
category=GateCategory.DRIFT,
|
|
458
|
+
findings=findings,
|
|
459
|
+
)
|