vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,716 @@
|
|
|
1
|
+
"""Findings builder -- synthesizes map entries into diagnostic findings.
|
|
2
|
+
|
|
3
|
+
Map 8: Findings synthesizes from all 7 maps (structural, data_contract, authority,
|
|
4
|
+
runtime, conflict, hotspot, refactor_boundary) and produces actionable findings
|
|
5
|
+
for operators.
|
|
6
|
+
|
|
7
|
+
Patterns:
|
|
8
|
+
- architecture_cycle: SCC cluster + fan_in >= 5 + hotspot score >= 60
|
|
9
|
+
- state_ownership_conflict: shared_write conflict + runtime node + 2+ production modules
|
|
10
|
+
- schema_drift_risk: contract_drift + multiple readers >= 2
|
|
11
|
+
- runtime_config_risk: env_coupling conflict + env_var not in contract
|
|
12
|
+
- write_authority_violation: illegal_write + verified target (path_constructor provenance)
|
|
13
|
+
|
|
14
|
+
Lifecycle:
|
|
15
|
+
- new: first time seeing this finding_id
|
|
16
|
+
- existing: same finding_id, same severity
|
|
17
|
+
- worsened: same finding_id, severity increased
|
|
18
|
+
- resolved: previous finding_id not in current output
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import hashlib
|
|
23
|
+
import json
|
|
24
|
+
import logging
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from .map_models import RepoMaps
|
|
29
|
+
from .map_models_findings import Finding, EvidenceItem
|
|
30
|
+
from .map_storage import maps_dir
|
|
31
|
+
|
|
32
|
+
__all__ = ["build_findings_map"]
|
|
33
|
+
|
|
34
|
+
_log = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def build_findings_map(
|
|
38
|
+
project_dir: Path,
|
|
39
|
+
repo_maps: RepoMaps,
|
|
40
|
+
maps_dir_override: Path | None = None,
|
|
41
|
+
) -> list[Finding]:
|
|
42
|
+
"""Build findings map from all 7 maps.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
project_dir: Absolute path to the target project root.
|
|
46
|
+
repo_maps: RepoMaps object containing all built maps.
|
|
47
|
+
maps_dir_override: Optional override for maps directory (for --output-dir).
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
list[Finding]: Synthesized findings with lifecycle states.
|
|
51
|
+
"""
|
|
52
|
+
project_dir = Path(project_dir).resolve()
|
|
53
|
+
_log.info("build_findings_map: starting for %s", project_dir)
|
|
54
|
+
|
|
55
|
+
# Load previous findings for lifecycle tracking
|
|
56
|
+
prev_findings = _load_previous_findings(project_dir, maps_dir_override)
|
|
57
|
+
prev_by_id = {f.finding_id: f for f in prev_findings}
|
|
58
|
+
|
|
59
|
+
# Synthesize new findings
|
|
60
|
+
current_findings: list[Finding] = []
|
|
61
|
+
|
|
62
|
+
# Pattern 1: architecture_cycle
|
|
63
|
+
current_findings.extend(_find_architecture_cycles(repo_maps, prev_by_id))
|
|
64
|
+
|
|
65
|
+
# Pattern 2: state_ownership_conflict
|
|
66
|
+
current_findings.extend(_find_state_ownership_conflicts(repo_maps, prev_by_id))
|
|
67
|
+
|
|
68
|
+
# Pattern 3: schema_drift_risk
|
|
69
|
+
current_findings.extend(_find_schema_drift_risks(repo_maps, prev_by_id))
|
|
70
|
+
|
|
71
|
+
# Pattern 4: runtime_config_risk
|
|
72
|
+
current_findings.extend(_find_runtime_config_risks(repo_maps, prev_by_id))
|
|
73
|
+
|
|
74
|
+
# Pattern 5: write_authority_violation
|
|
75
|
+
current_findings.extend(_find_write_authority_violations(repo_maps, prev_by_id))
|
|
76
|
+
|
|
77
|
+
# Add resolved findings for lifecycle
|
|
78
|
+
current_by_id = {f.finding_id: f for f in current_findings}
|
|
79
|
+
for prev_id, prev_finding in prev_by_id.items():
|
|
80
|
+
if prev_id not in current_by_id:
|
|
81
|
+
current_findings.append(_mark_resolved(prev_finding))
|
|
82
|
+
|
|
83
|
+
_log.info(
|
|
84
|
+
"build_findings_map: synthesized %d findings (%d new, %d existing, %d resolved)",
|
|
85
|
+
len(current_findings),
|
|
86
|
+
sum(1 for f in current_findings if f.finding_status == "new"),
|
|
87
|
+
sum(1 for f in current_findings if f.finding_status == "existing"),
|
|
88
|
+
sum(1 for f in current_findings if f.finding_status == "resolved"),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return current_findings
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _load_previous_findings(project_dir: Path, maps_dir_override: Path | None = None) -> list[Finding]:
|
|
95
|
+
"""Load previous findings map if exists.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
project_dir: Absolute path to the target project root.
|
|
99
|
+
maps_dir_override: Optional override for maps directory (for --output-dir).
|
|
100
|
+
"""
|
|
101
|
+
if maps_dir_override is not None:
|
|
102
|
+
mdir = maps_dir_override.resolve()
|
|
103
|
+
else:
|
|
104
|
+
mdir = maps_dir(project_dir)
|
|
105
|
+
findings_path = mdir / "80_findings_map.json"
|
|
106
|
+
|
|
107
|
+
if not findings_path.exists():
|
|
108
|
+
return []
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
import json
|
|
112
|
+
content = findings_path.read_text(encoding="utf-8")
|
|
113
|
+
payload = json.loads(content)
|
|
114
|
+
except (OSError, json.JSONDecodeError, UnicodeDecodeError) as exc:
|
|
115
|
+
_log.warning("_load_previous_findings: failed to read %s: %s", findings_path, exc)
|
|
116
|
+
return []
|
|
117
|
+
|
|
118
|
+
entries_raw = payload.get("entries", [])
|
|
119
|
+
findings: list[Finding] = []
|
|
120
|
+
for i, raw_entry in enumerate(entries_raw):
|
|
121
|
+
try:
|
|
122
|
+
finding = Finding.from_dict(raw_entry)
|
|
123
|
+
findings.append(finding)
|
|
124
|
+
except (KeyError, TypeError, ValueError) as exc:
|
|
125
|
+
_log.debug("_load_previous_findings: skipping entry %d: %s", i, exc)
|
|
126
|
+
|
|
127
|
+
_log.debug("_load_previous_findings: loaded %d previous findings", len(findings))
|
|
128
|
+
return findings
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _make_finding_id(category: str, subject: str, details: str) -> str:
|
|
132
|
+
"""Generate stable finding_id from category, subject, and details."""
|
|
133
|
+
content = f"{category}:{subject}:{details}"
|
|
134
|
+
hash_val = hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
|
|
135
|
+
return f"{category}_{hash_val}"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _mark_resolved(finding: Finding) -> Finding:
|
|
139
|
+
"""Mark a previous finding as resolved."""
|
|
140
|
+
return Finding(
|
|
141
|
+
finding_id=finding.finding_id,
|
|
142
|
+
category=finding.category,
|
|
143
|
+
title=finding.title,
|
|
144
|
+
severity=finding.severity,
|
|
145
|
+
confidence=finding.confidence,
|
|
146
|
+
why_it_matters=finding.why_it_matters,
|
|
147
|
+
suggested_fix=finding.suggested_fix,
|
|
148
|
+
affected_files=finding.affected_files,
|
|
149
|
+
evidence=finding.evidence,
|
|
150
|
+
source_maps=finding.source_maps,
|
|
151
|
+
finding_status="resolved",
|
|
152
|
+
source=finding.source,
|
|
153
|
+
freshness=finding.freshness,
|
|
154
|
+
status=finding.status,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _get_lifecycle_status(
|
|
159
|
+
finding_id: str,
|
|
160
|
+
severity: str,
|
|
161
|
+
prev_by_id: dict[str, Finding],
|
|
162
|
+
) -> str:
|
|
163
|
+
"""Determine lifecycle status (new/existing/worsened)."""
|
|
164
|
+
if finding_id not in prev_by_id:
|
|
165
|
+
return "new"
|
|
166
|
+
prev = prev_by_id[finding_id]
|
|
167
|
+
if prev.severity == severity:
|
|
168
|
+
return "existing"
|
|
169
|
+
# Check if severity worsened (critical > high > medium > low)
|
|
170
|
+
_severity_level = {"critical": 4, "high": 3, "medium": 2, "low": 1}
|
|
171
|
+
curr_level = _severity_level.get(severity, 0)
|
|
172
|
+
prev_level = _severity_level.get(prev.severity, 0)
|
|
173
|
+
return "worsened" if curr_level > prev_level else "existing"
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _find_architecture_cycles(
|
|
177
|
+
repo_maps: RepoMaps,
|
|
178
|
+
prev_by_id: dict[str, Finding],
|
|
179
|
+
) -> list[Finding]:
|
|
180
|
+
"""Find architecture_cycle findings: SCC + fan_in >= 5 + hotspot score >= 60.
|
|
181
|
+
|
|
182
|
+
Trigger: ConflictEntry with domain == "structural_cycles"
|
|
183
|
+
AND cluster_max_fan_in >= 5 in any source entry
|
|
184
|
+
AND a HotspotEntry for any SCC member with hotspot_score >= 60.
|
|
185
|
+
"""
|
|
186
|
+
findings: list[Finding] = []
|
|
187
|
+
|
|
188
|
+
# Build hotspot index: file -> HotspotEntry
|
|
189
|
+
hotspot_by_file: dict[str, Any] = {}
|
|
190
|
+
for h in repo_maps.hotspot:
|
|
191
|
+
hotspot_by_file[getattr(h, "target", "")] = h
|
|
192
|
+
|
|
193
|
+
for conflict in repo_maps.conflict:
|
|
194
|
+
if getattr(conflict, "domain", "") != "structural_cycles":
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
# Parse sources to collect SCC member files and cluster_max_fan_in
|
|
198
|
+
sources_parsed: list[dict] = []
|
|
199
|
+
for src_raw in conflict.sources:
|
|
200
|
+
try:
|
|
201
|
+
src = json.loads(src_raw) if isinstance(src_raw, str) else src_raw
|
|
202
|
+
if isinstance(src, dict):
|
|
203
|
+
sources_parsed.append(src)
|
|
204
|
+
except (json.JSONDecodeError, TypeError):
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
# Check fan_in threshold across all sources
|
|
208
|
+
max_fan_in = max(
|
|
209
|
+
(s.get("cluster_max_fan_in", 0) for s in sources_parsed),
|
|
210
|
+
default=0,
|
|
211
|
+
)
|
|
212
|
+
if max_fan_in < 5:
|
|
213
|
+
continue
|
|
214
|
+
|
|
215
|
+
# Collect SCC member files from sources
|
|
216
|
+
scc_files = [s.get("file", "") for s in sources_parsed if s.get("file")]
|
|
217
|
+
|
|
218
|
+
# Find hotspot entries for any SCC member with score >= 60
|
|
219
|
+
hot_entries = [
|
|
220
|
+
hotspot_by_file[f]
|
|
221
|
+
for f in scc_files
|
|
222
|
+
if f in hotspot_by_file and getattr(hotspot_by_file[f], "hotspot_score", 0) >= 60
|
|
223
|
+
]
|
|
224
|
+
if not hot_entries:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
# Construct evidence
|
|
228
|
+
evidence_items: list[str] = []
|
|
229
|
+
for src in sources_parsed:
|
|
230
|
+
ev = EvidenceItem(
|
|
231
|
+
kind="map_entry",
|
|
232
|
+
map="conflict_map",
|
|
233
|
+
entry_id=getattr(conflict, "conflict_id", ""),
|
|
234
|
+
file=src.get("file", ""),
|
|
235
|
+
)
|
|
236
|
+
evidence_items.append(json.dumps(ev.to_dict(), sort_keys=True))
|
|
237
|
+
|
|
238
|
+
for h in hot_entries:
|
|
239
|
+
ev = EvidenceItem(
|
|
240
|
+
kind="map_entry",
|
|
241
|
+
map="hotspot_map",
|
|
242
|
+
entry_id=getattr(h, "target", ""),
|
|
243
|
+
file=getattr(h, "target", ""),
|
|
244
|
+
)
|
|
245
|
+
evidence_items.append(json.dumps(ev.to_dict(), sort_keys=True))
|
|
246
|
+
|
|
247
|
+
# Representative subject for stable ID
|
|
248
|
+
subject = conflict.subject
|
|
249
|
+
details = f"fan_in={max_fan_in}"
|
|
250
|
+
finding_id = _make_finding_id("architecture_cycle", subject, details)
|
|
251
|
+
|
|
252
|
+
# Severity: critical if cluster is all-production (conflict severity == "high"),
|
|
253
|
+
# otherwise high
|
|
254
|
+
severity = "critical" if conflict.severity == "high" else "high"
|
|
255
|
+
lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
|
|
256
|
+
|
|
257
|
+
# Build a deduplication-stable strategy hint from SCC size
|
|
258
|
+
scc_size = len(scc_files)
|
|
259
|
+
suggested_fix = (
|
|
260
|
+
f"Decouple cycle by introducing an abstraction layer or moving shared "
|
|
261
|
+
f"symbols to a common base module (cycle cluster size: {scc_size} files)"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
finding = Finding(
|
|
265
|
+
finding_id=finding_id,
|
|
266
|
+
category="architecture_cycle",
|
|
267
|
+
title=f"Import cycle cluster with high fan-in ({max_fan_in}) and hotspot overlap",
|
|
268
|
+
severity=severity,
|
|
269
|
+
confidence=min(conflict.confidence, 0.9),
|
|
270
|
+
why_it_matters=(
|
|
271
|
+
"Circular imports with high fan-in create tight coupling that blocks "
|
|
272
|
+
"independent testing, deployment, and refactoring of affected modules."
|
|
273
|
+
),
|
|
274
|
+
suggested_fix=suggested_fix,
|
|
275
|
+
affected_files=tuple(sorted(set(scc_files))),
|
|
276
|
+
evidence=tuple(evidence_items),
|
|
277
|
+
source_maps=("structural_map", "conflict_map", "hotspot_map"),
|
|
278
|
+
finding_status=lifecycle,
|
|
279
|
+
source="synthesis",
|
|
280
|
+
freshness="",
|
|
281
|
+
status="validated",
|
|
282
|
+
)
|
|
283
|
+
findings.append(finding)
|
|
284
|
+
|
|
285
|
+
_log.debug("_find_architecture_cycles: %d findings", len(findings))
|
|
286
|
+
return findings
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _find_state_ownership_conflicts(
|
|
290
|
+
repo_maps: RepoMaps,
|
|
291
|
+
prev_by_id: dict[str, Finding],
|
|
292
|
+
) -> list[Finding]:
|
|
293
|
+
"""Find state_ownership_conflict: shared_write + runtime node + 2+ production writers.
|
|
294
|
+
|
|
295
|
+
Trigger: ConflictEntry with action == "investigate_shared_write"
|
|
296
|
+
AND at least 2 source entries with file_role == "production"
|
|
297
|
+
AND a RuntimeNode with defined_in matching the conflict subject (target file).
|
|
298
|
+
"""
|
|
299
|
+
findings: list[Finding] = []
|
|
300
|
+
|
|
301
|
+
# Index runtime nodes by defined_in file
|
|
302
|
+
runtime_by_file: dict[str, Any] = {}
|
|
303
|
+
for node in repo_maps.runtime:
|
|
304
|
+
f = getattr(node, "defined_in", "")
|
|
305
|
+
if f:
|
|
306
|
+
runtime_by_file[f] = node
|
|
307
|
+
|
|
308
|
+
for conflict in repo_maps.conflict:
|
|
309
|
+
if getattr(conflict, "action", "") != "investigate_shared_write":
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
# Parse sources to collect writer files and roles
|
|
313
|
+
sources_parsed: list[dict] = []
|
|
314
|
+
for src_raw in conflict.sources:
|
|
315
|
+
try:
|
|
316
|
+
src = json.loads(src_raw) if isinstance(src_raw, str) else src_raw
|
|
317
|
+
if isinstance(src, dict):
|
|
318
|
+
sources_parsed.append(src)
|
|
319
|
+
except (json.JSONDecodeError, TypeError):
|
|
320
|
+
pass
|
|
321
|
+
|
|
322
|
+
# Count production writers
|
|
323
|
+
production_writers = [
|
|
324
|
+
s.get("file", "")
|
|
325
|
+
for s in sources_parsed
|
|
326
|
+
if s.get("file_role", "production") == "production" and s.get("file")
|
|
327
|
+
]
|
|
328
|
+
if len(production_writers) < 2:
|
|
329
|
+
continue
|
|
330
|
+
|
|
331
|
+
# Target file is the conflict subject
|
|
332
|
+
target_file = conflict.subject
|
|
333
|
+
|
|
334
|
+
# Check that a RuntimeNode exists for the target file
|
|
335
|
+
runtime_node = runtime_by_file.get(target_file)
|
|
336
|
+
if runtime_node is None:
|
|
337
|
+
continue
|
|
338
|
+
|
|
339
|
+
# Build evidence
|
|
340
|
+
evidence_items: list[str] = []
|
|
341
|
+
for writer_file in sorted(set(production_writers)):
|
|
342
|
+
ev = EvidenceItem(
|
|
343
|
+
kind="source_location",
|
|
344
|
+
file=writer_file,
|
|
345
|
+
map="authority_map",
|
|
346
|
+
)
|
|
347
|
+
evidence_items.append(json.dumps(ev.to_dict(), sort_keys=True))
|
|
348
|
+
|
|
349
|
+
ev_conflict = EvidenceItem(
|
|
350
|
+
kind="map_entry",
|
|
351
|
+
map="conflict_map",
|
|
352
|
+
entry_id=getattr(conflict, "conflict_id", ""),
|
|
353
|
+
file=target_file,
|
|
354
|
+
)
|
|
355
|
+
evidence_items.append(json.dumps(ev_conflict.to_dict(), sort_keys=True))
|
|
356
|
+
|
|
357
|
+
ev_runtime = EvidenceItem(
|
|
358
|
+
kind="map_entry",
|
|
359
|
+
map="runtime_map",
|
|
360
|
+
entry_id=getattr(runtime_node, "node", ""),
|
|
361
|
+
file=getattr(runtime_node, "defined_in", ""),
|
|
362
|
+
)
|
|
363
|
+
evidence_items.append(json.dumps(ev_runtime.to_dict(), sort_keys=True))
|
|
364
|
+
|
|
365
|
+
details = f"writers={len(production_writers)}"
|
|
366
|
+
finding_id = _make_finding_id("state_ownership_conflict", target_file, details)
|
|
367
|
+
severity = "high"
|
|
368
|
+
lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
|
|
369
|
+
|
|
370
|
+
finding = Finding(
|
|
371
|
+
finding_id=finding_id,
|
|
372
|
+
category="state_ownership_conflict",
|
|
373
|
+
title=f"Multiple production modules write to shared target: {target_file}",
|
|
374
|
+
severity=severity,
|
|
375
|
+
confidence=min(conflict.confidence, 0.85),
|
|
376
|
+
why_it_matters=(
|
|
377
|
+
"Shared write access without a single owner leads to race conditions, "
|
|
378
|
+
"inconsistent state, and undefined ordering of writes at runtime."
|
|
379
|
+
),
|
|
380
|
+
suggested_fix=(
|
|
381
|
+
f"Designate single owner for {target_file} and route all writes "
|
|
382
|
+
"through that module; demote remaining writers to readers."
|
|
383
|
+
),
|
|
384
|
+
affected_files=tuple(sorted(set(production_writers) | {target_file})),
|
|
385
|
+
evidence=tuple(evidence_items),
|
|
386
|
+
source_maps=("authority_map", "conflict_map", "runtime_map"),
|
|
387
|
+
finding_status=lifecycle,
|
|
388
|
+
source="synthesis",
|
|
389
|
+
freshness="",
|
|
390
|
+
status="validated",
|
|
391
|
+
)
|
|
392
|
+
findings.append(finding)
|
|
393
|
+
|
|
394
|
+
_log.debug("_find_state_ownership_conflicts: %d findings", len(findings))
|
|
395
|
+
return findings
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _find_schema_drift_risks(
|
|
399
|
+
repo_maps: RepoMaps,
|
|
400
|
+
prev_by_id: dict[str, Finding],
|
|
401
|
+
) -> list[Finding]:
|
|
402
|
+
"""Find schema_drift_risk: contract_drift conflict + 2+ readers in data_contract map.
|
|
403
|
+
|
|
404
|
+
Trigger: ConflictEntry with domain == "contract_drift"
|
|
405
|
+
AND corresponding DataContractEntry with len(readers) >= 2.
|
|
406
|
+
"""
|
|
407
|
+
findings: list[Finding] = []
|
|
408
|
+
|
|
409
|
+
# Index data_contract entries by entity name
|
|
410
|
+
contract_by_entity: dict[str, Any] = {}
|
|
411
|
+
for contract in repo_maps.data_contract:
|
|
412
|
+
entity = getattr(contract, "entity", "")
|
|
413
|
+
if entity:
|
|
414
|
+
contract_by_entity[entity] = contract
|
|
415
|
+
|
|
416
|
+
for conflict in repo_maps.conflict:
|
|
417
|
+
if getattr(conflict, "domain", "") != "contract_drift":
|
|
418
|
+
continue
|
|
419
|
+
|
|
420
|
+
entity = conflict.subject
|
|
421
|
+
contract = contract_by_entity.get(entity)
|
|
422
|
+
if contract is None:
|
|
423
|
+
continue
|
|
424
|
+
|
|
425
|
+
readers = getattr(contract, "readers", ())
|
|
426
|
+
if len(readers) < 2:
|
|
427
|
+
continue
|
|
428
|
+
|
|
429
|
+
# Build evidence
|
|
430
|
+
evidence_items: list[str] = []
|
|
431
|
+
|
|
432
|
+
ev_conflict = EvidenceItem(
|
|
433
|
+
kind="map_entry",
|
|
434
|
+
map="conflict_map",
|
|
435
|
+
entry_id=getattr(conflict, "conflict_id", ""),
|
|
436
|
+
path=entity,
|
|
437
|
+
)
|
|
438
|
+
evidence_items.append(json.dumps(ev_conflict.to_dict(), sort_keys=True))
|
|
439
|
+
|
|
440
|
+
for reader_file in sorted(readers):
|
|
441
|
+
ev = EvidenceItem(
|
|
442
|
+
kind="source_location",
|
|
443
|
+
file=reader_file,
|
|
444
|
+
map="data_contract_map",
|
|
445
|
+
)
|
|
446
|
+
evidence_items.append(json.dumps(ev.to_dict(), sort_keys=True))
|
|
447
|
+
|
|
448
|
+
details = f"readers={len(readers)}"
|
|
449
|
+
finding_id = _make_finding_id("schema_drift_risk", entity, details)
|
|
450
|
+
severity = "medium"
|
|
451
|
+
lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
|
|
452
|
+
|
|
453
|
+
# Collect drift_flags from contract for context
|
|
454
|
+
drift_flags = list(getattr(contract, "drift_flags", ()))
|
|
455
|
+
drift_summary = ", ".join(drift_flags[:3]) if drift_flags else "schema inconsistency"
|
|
456
|
+
|
|
457
|
+
finding = Finding(
|
|
458
|
+
finding_id=finding_id,
|
|
459
|
+
category="schema_drift_risk",
|
|
460
|
+
title=f"Schema drift risk in entity '{entity}' with {len(readers)} readers",
|
|
461
|
+
severity=severity,
|
|
462
|
+
confidence=min(conflict.confidence, 0.8),
|
|
463
|
+
why_it_matters=(
|
|
464
|
+
f"Schema drift ({drift_summary}) with multiple readers means "
|
|
465
|
+
"consumers may silently receive stale or incompatible data shapes."
|
|
466
|
+
),
|
|
467
|
+
suggested_fix=(
|
|
468
|
+
"Align schema variants or add a migration step; pin all readers "
|
|
469
|
+
"to the canonical schema and remove divergent variants."
|
|
470
|
+
),
|
|
471
|
+
affected_files=tuple(sorted(readers)),
|
|
472
|
+
evidence=tuple(evidence_items),
|
|
473
|
+
source_maps=("data_contract_map", "conflict_map"),
|
|
474
|
+
finding_status=lifecycle,
|
|
475
|
+
source="synthesis",
|
|
476
|
+
freshness="",
|
|
477
|
+
status="validated",
|
|
478
|
+
)
|
|
479
|
+
findings.append(finding)
|
|
480
|
+
|
|
481
|
+
_log.debug("_find_schema_drift_risks: %d findings", len(findings))
|
|
482
|
+
return findings
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def _find_runtime_config_risks(
|
|
486
|
+
repo_maps: RepoMaps,
|
|
487
|
+
prev_by_id: dict[str, Finding],
|
|
488
|
+
) -> list[Finding]:
|
|
489
|
+
"""Find runtime_config_risk: env_coupling conflict + env_var absent from contracts.
|
|
490
|
+
|
|
491
|
+
Trigger: ConflictEntry with domain == "runtime_env_coupling"
|
|
492
|
+
AND none of the env_vars from the conflict sources appear as an
|
|
493
|
+
entity name in any DataContractEntry.
|
|
494
|
+
"""
|
|
495
|
+
findings: list[Finding] = []
|
|
496
|
+
|
|
497
|
+
# Collect all entity names from data_contract map (treat as documented env vars)
|
|
498
|
+
contract_entities: set[str] = {
|
|
499
|
+
getattr(c, "entity", "") for c in repo_maps.data_contract
|
|
500
|
+
}
|
|
501
|
+
contract_entities.discard("")
|
|
502
|
+
|
|
503
|
+
for conflict in repo_maps.conflict:
|
|
504
|
+
if getattr(conflict, "domain", "") != "runtime_env_coupling":
|
|
505
|
+
continue
|
|
506
|
+
|
|
507
|
+
# Parse sources to find env_vars list
|
|
508
|
+
env_vars: list[str] = []
|
|
509
|
+
node_name: str = conflict.subject
|
|
510
|
+
defined_in: str = ""
|
|
511
|
+
for src_raw in conflict.sources:
|
|
512
|
+
try:
|
|
513
|
+
src = json.loads(src_raw) if isinstance(src_raw, str) else src_raw
|
|
514
|
+
if isinstance(src, dict):
|
|
515
|
+
env_vars.extend(src.get("env_vars", []))
|
|
516
|
+
defined_in = defined_in or src.get("defined_in", "")
|
|
517
|
+
node_name = node_name or src.get("node", "")
|
|
518
|
+
except (json.JSONDecodeError, TypeError):
|
|
519
|
+
pass
|
|
520
|
+
|
|
521
|
+
if not env_vars:
|
|
522
|
+
continue
|
|
523
|
+
|
|
524
|
+
# Find env vars not present in any contract entity
|
|
525
|
+
undocumented = [v for v in env_vars if v not in contract_entities]
|
|
526
|
+
if not undocumented:
|
|
527
|
+
continue
|
|
528
|
+
|
|
529
|
+
# Build evidence — one finding per undocumented env var to keep IDs stable
|
|
530
|
+
for env_var in sorted(set(undocumented)):
|
|
531
|
+
evidence_items: list[str] = []
|
|
532
|
+
|
|
533
|
+
ev_conflict = EvidenceItem(
|
|
534
|
+
kind="map_entry",
|
|
535
|
+
map="conflict_map",
|
|
536
|
+
entry_id=getattr(conflict, "conflict_id", ""),
|
|
537
|
+
path=env_var,
|
|
538
|
+
)
|
|
539
|
+
evidence_items.append(json.dumps(ev_conflict.to_dict(), sort_keys=True))
|
|
540
|
+
|
|
541
|
+
if defined_in:
|
|
542
|
+
ev_location = EvidenceItem(
|
|
543
|
+
kind="source_location",
|
|
544
|
+
file=defined_in,
|
|
545
|
+
map="runtime_map",
|
|
546
|
+
)
|
|
547
|
+
evidence_items.append(json.dumps(ev_location.to_dict(), sort_keys=True))
|
|
548
|
+
|
|
549
|
+
details = f"env_var={env_var}"
|
|
550
|
+
finding_id = _make_finding_id("runtime_config_risk", node_name, details)
|
|
551
|
+
|
|
552
|
+
# Severity: medium if env var name looks like a critical secret,
|
|
553
|
+
# otherwise low — keep it simple, use medium to be conservative
|
|
554
|
+
severity = (
|
|
555
|
+
"medium"
|
|
556
|
+
if any(kw in env_var.upper() for kw in ("KEY", "SECRET", "TOKEN", "PASSWORD", "PASS"))
|
|
557
|
+
else "low"
|
|
558
|
+
)
|
|
559
|
+
lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
|
|
560
|
+
|
|
561
|
+
affected: list[str] = [defined_in] if defined_in else []
|
|
562
|
+
|
|
563
|
+
finding = Finding(
|
|
564
|
+
finding_id=finding_id,
|
|
565
|
+
category="runtime_config_risk",
|
|
566
|
+
title=f"Undocumented env var '{env_var}' coupled to runtime node '{node_name}'",
|
|
567
|
+
severity=severity,
|
|
568
|
+
confidence=min(conflict.confidence, 0.75),
|
|
569
|
+
why_it_matters=(
|
|
570
|
+
f"Env var '{env_var}' is consumed at runtime but absent from "
|
|
571
|
+
"any data contract, making its presence, type, and defaults invisible "
|
|
572
|
+
"to operators and static analysis."
|
|
573
|
+
),
|
|
574
|
+
suggested_fix=(
|
|
575
|
+
f"Document env var '{env_var}' in a data contract entity "
|
|
576
|
+
"or remove the runtime coupling if the var is no longer needed."
|
|
577
|
+
),
|
|
578
|
+
affected_files=tuple(affected),
|
|
579
|
+
evidence=tuple(evidence_items),
|
|
580
|
+
source_maps=("runtime_map", "conflict_map", "data_contract_map"),
|
|
581
|
+
finding_status=lifecycle,
|
|
582
|
+
source="synthesis",
|
|
583
|
+
freshness="",
|
|
584
|
+
status="validated",
|
|
585
|
+
)
|
|
586
|
+
findings.append(finding)
|
|
587
|
+
|
|
588
|
+
_log.debug("_find_runtime_config_risks: %d findings", len(findings))
|
|
589
|
+
return findings
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def _find_write_authority_violations(
|
|
593
|
+
repo_maps: RepoMaps,
|
|
594
|
+
prev_by_id: dict[str, Finding],
|
|
595
|
+
) -> list[Finding]:
|
|
596
|
+
"""Find write_authority_violation: illegal_write + path_constructor provenance.
|
|
597
|
+
|
|
598
|
+
Trigger: ConflictEntry with action == "investigate_illegal_write" (domain in authority map)
|
|
599
|
+
AND in the corresponding AuthorityDomain, the illegal writer's detected entry
|
|
600
|
+
has provenance == "path_constructor", meaning the write target is statically
|
|
601
|
+
verifiable (not a dynamic parameter).
|
|
602
|
+
"""
|
|
603
|
+
findings: list[Finding] = []
|
|
604
|
+
|
|
605
|
+
# Build authority domain index: authority_domain name -> AuthorityDomain
|
|
606
|
+
authority_by_domain: dict[str, Any] = {}
|
|
607
|
+
for domain in repo_maps.authority:
|
|
608
|
+
name = getattr(domain, "authority_domain", "")
|
|
609
|
+
if name:
|
|
610
|
+
authority_by_domain[name] = domain
|
|
611
|
+
|
|
612
|
+
for conflict in repo_maps.conflict:
|
|
613
|
+
if getattr(conflict, "action", "") != "investigate_illegal_write":
|
|
614
|
+
continue
|
|
615
|
+
|
|
616
|
+
# The conflict subject is the illegal writer file; domain is the authority domain name
|
|
617
|
+
# (may have ":structural" suffix from _check_authority_vs_structural — strip it)
|
|
618
|
+
raw_domain = getattr(conflict, "domain", "")
|
|
619
|
+
authority_domain_name = raw_domain.removesuffix(":structural")
|
|
620
|
+
writer_file = conflict.subject
|
|
621
|
+
|
|
622
|
+
auth_domain = authority_by_domain.get(authority_domain_name)
|
|
623
|
+
if auth_domain is None:
|
|
624
|
+
continue
|
|
625
|
+
|
|
626
|
+
# Find the writer entry in writers_detected that matches writer_file
|
|
627
|
+
# and has provenance == "path_constructor"
|
|
628
|
+
path_constructor_target: str = ""
|
|
629
|
+
for writer_raw in auth_domain.writers_detected:
|
|
630
|
+
try:
|
|
631
|
+
writer = json.loads(writer_raw) if isinstance(writer_raw, str) else writer_raw
|
|
632
|
+
except (json.JSONDecodeError, TypeError):
|
|
633
|
+
continue
|
|
634
|
+
if not isinstance(writer, dict):
|
|
635
|
+
continue
|
|
636
|
+
|
|
637
|
+
# Match by file, location, or target key (normalise as in conflict_builder)
|
|
638
|
+
wfile = (
|
|
639
|
+
writer.get("file", "")
|
|
640
|
+
or writer.get("location", "")
|
|
641
|
+
or writer.get("target", "")
|
|
642
|
+
)
|
|
643
|
+
if wfile != writer_file:
|
|
644
|
+
continue
|
|
645
|
+
if writer.get("kind") != "illegal_write":
|
|
646
|
+
continue
|
|
647
|
+
if writer.get("provenance") == "path_constructor":
|
|
648
|
+
path_constructor_target = writer.get("target", writer_file)
|
|
649
|
+
break
|
|
650
|
+
|
|
651
|
+
if not path_constructor_target:
|
|
652
|
+
continue
|
|
653
|
+
|
|
654
|
+
# Build evidence
|
|
655
|
+
evidence_items: list[str] = []
|
|
656
|
+
|
|
657
|
+
ev_writer = EvidenceItem(
|
|
658
|
+
kind="source_location",
|
|
659
|
+
file=writer_file,
|
|
660
|
+
map="authority_map",
|
|
661
|
+
)
|
|
662
|
+
evidence_items.append(json.dumps(ev_writer.to_dict(), sort_keys=True))
|
|
663
|
+
|
|
664
|
+
ev_target = EvidenceItem(
|
|
665
|
+
kind="target_path",
|
|
666
|
+
path=path_constructor_target,
|
|
667
|
+
map="authority_map",
|
|
668
|
+
)
|
|
669
|
+
evidence_items.append(json.dumps(ev_target.to_dict(), sort_keys=True))
|
|
670
|
+
|
|
671
|
+
ev_conflict = EvidenceItem(
|
|
672
|
+
kind="map_entry",
|
|
673
|
+
map="conflict_map",
|
|
674
|
+
entry_id=getattr(conflict, "conflict_id", ""),
|
|
675
|
+
file=writer_file,
|
|
676
|
+
)
|
|
677
|
+
evidence_items.append(json.dumps(ev_conflict.to_dict(), sort_keys=True))
|
|
678
|
+
|
|
679
|
+
details = f"target={path_constructor_target}"
|
|
680
|
+
finding_id = _make_finding_id("write_authority_violation", writer_file, details)
|
|
681
|
+
severity = "high"
|
|
682
|
+
lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
|
|
683
|
+
|
|
684
|
+
canonical_owner = getattr(auth_domain, "canonical_owner", authority_domain_name)
|
|
685
|
+
|
|
686
|
+
finding = Finding(
|
|
687
|
+
finding_id=finding_id,
|
|
688
|
+
category="write_authority_violation",
|
|
689
|
+
title=(
|
|
690
|
+
f"Illegal write by '{writer_file}' to path_constructor target "
|
|
691
|
+
f"in domain '{authority_domain_name}'"
|
|
692
|
+
),
|
|
693
|
+
severity=severity,
|
|
694
|
+
confidence=min(conflict.confidence, 0.9),
|
|
695
|
+
why_it_matters=(
|
|
696
|
+
f"The file writes to a statically-verifiable path ({path_constructor_target}) "
|
|
697
|
+
f"that belongs to domain '{authority_domain_name}' (canonical owner: "
|
|
698
|
+
f"{canonical_owner}), bypassing authority controls."
|
|
699
|
+
),
|
|
700
|
+
suggested_fix=(
|
|
701
|
+
f"Add '{writer_file}' to allowed_writers for domain "
|
|
702
|
+
f"'{authority_domain_name}', or refactor writes through the canonical "
|
|
703
|
+
f"owner ({canonical_owner})."
|
|
704
|
+
),
|
|
705
|
+
affected_files=tuple(sorted({writer_file, path_constructor_target})),
|
|
706
|
+
evidence=tuple(evidence_items),
|
|
707
|
+
source_maps=("authority_map", "conflict_map"),
|
|
708
|
+
finding_status=lifecycle,
|
|
709
|
+
source="synthesis",
|
|
710
|
+
freshness="",
|
|
711
|
+
status="validated",
|
|
712
|
+
)
|
|
713
|
+
findings.append(finding)
|
|
714
|
+
|
|
715
|
+
_log.debug("_find_write_authority_violations: %d findings", len(findings))
|
|
716
|
+
return findings
|