vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,716 @@
1
+ """Findings builder -- synthesizes map entries into diagnostic findings.
2
+
3
+ Map 8: Findings synthesizes from all 7 maps (structural, data_contract, authority,
4
+ runtime, conflict, hotspot, refactor_boundary) and produces actionable findings
5
+ for operators.
6
+
7
+ Patterns:
8
+ - architecture_cycle: SCC cluster + fan_in >= 5 + hotspot score >= 60
9
+ - state_ownership_conflict: shared_write conflict + runtime node + 2+ production modules
10
+ - schema_drift_risk: contract_drift + multiple readers >= 2
11
+ - runtime_config_risk: env_coupling conflict + env_var not in contract
12
+ - write_authority_violation: illegal_write + verified target (path_constructor provenance)
13
+
14
+ Lifecycle:
15
+ - new: first time seeing this finding_id
16
+ - existing: same finding_id, same severity
17
+ - worsened: same finding_id, severity increased
18
+ - resolved: previous finding_id not in current output
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import hashlib
23
+ import json
24
+ import logging
25
+ from pathlib import Path
26
+ from typing import Any
27
+
28
+ from .map_models import RepoMaps
29
+ from .map_models_findings import Finding, EvidenceItem
30
+ from .map_storage import maps_dir
31
+
32
+ __all__ = ["build_findings_map"]
33
+
34
+ _log = logging.getLogger(__name__)
35
+
36
+
37
+ def build_findings_map(
38
+ project_dir: Path,
39
+ repo_maps: RepoMaps,
40
+ maps_dir_override: Path | None = None,
41
+ ) -> list[Finding]:
42
+ """Build findings map from all 7 maps.
43
+
44
+ Args:
45
+ project_dir: Absolute path to the target project root.
46
+ repo_maps: RepoMaps object containing all built maps.
47
+ maps_dir_override: Optional override for maps directory (for --output-dir).
48
+
49
+ Returns:
50
+ list[Finding]: Synthesized findings with lifecycle states.
51
+ """
52
+ project_dir = Path(project_dir).resolve()
53
+ _log.info("build_findings_map: starting for %s", project_dir)
54
+
55
+ # Load previous findings for lifecycle tracking
56
+ prev_findings = _load_previous_findings(project_dir, maps_dir_override)
57
+ prev_by_id = {f.finding_id: f for f in prev_findings}
58
+
59
+ # Synthesize new findings
60
+ current_findings: list[Finding] = []
61
+
62
+ # Pattern 1: architecture_cycle
63
+ current_findings.extend(_find_architecture_cycles(repo_maps, prev_by_id))
64
+
65
+ # Pattern 2: state_ownership_conflict
66
+ current_findings.extend(_find_state_ownership_conflicts(repo_maps, prev_by_id))
67
+
68
+ # Pattern 3: schema_drift_risk
69
+ current_findings.extend(_find_schema_drift_risks(repo_maps, prev_by_id))
70
+
71
+ # Pattern 4: runtime_config_risk
72
+ current_findings.extend(_find_runtime_config_risks(repo_maps, prev_by_id))
73
+
74
+ # Pattern 5: write_authority_violation
75
+ current_findings.extend(_find_write_authority_violations(repo_maps, prev_by_id))
76
+
77
+ # Add resolved findings for lifecycle
78
+ current_by_id = {f.finding_id: f for f in current_findings}
79
+ for prev_id, prev_finding in prev_by_id.items():
80
+ if prev_id not in current_by_id:
81
+ current_findings.append(_mark_resolved(prev_finding))
82
+
83
+ _log.info(
84
+ "build_findings_map: synthesized %d findings (%d new, %d existing, %d resolved)",
85
+ len(current_findings),
86
+ sum(1 for f in current_findings if f.finding_status == "new"),
87
+ sum(1 for f in current_findings if f.finding_status == "existing"),
88
+ sum(1 for f in current_findings if f.finding_status == "resolved"),
89
+ )
90
+
91
+ return current_findings
92
+
93
+
94
+ def _load_previous_findings(project_dir: Path, maps_dir_override: Path | None = None) -> list[Finding]:
95
+ """Load previous findings map if exists.
96
+
97
+ Args:
98
+ project_dir: Absolute path to the target project root.
99
+ maps_dir_override: Optional override for maps directory (for --output-dir).
100
+ """
101
+ if maps_dir_override is not None:
102
+ mdir = maps_dir_override.resolve()
103
+ else:
104
+ mdir = maps_dir(project_dir)
105
+ findings_path = mdir / "80_findings_map.json"
106
+
107
+ if not findings_path.exists():
108
+ return []
109
+
110
+ try:
111
+ import json
112
+ content = findings_path.read_text(encoding="utf-8")
113
+ payload = json.loads(content)
114
+ except (OSError, json.JSONDecodeError, UnicodeDecodeError) as exc:
115
+ _log.warning("_load_previous_findings: failed to read %s: %s", findings_path, exc)
116
+ return []
117
+
118
+ entries_raw = payload.get("entries", [])
119
+ findings: list[Finding] = []
120
+ for i, raw_entry in enumerate(entries_raw):
121
+ try:
122
+ finding = Finding.from_dict(raw_entry)
123
+ findings.append(finding)
124
+ except (KeyError, TypeError, ValueError) as exc:
125
+ _log.debug("_load_previous_findings: skipping entry %d: %s", i, exc)
126
+
127
+ _log.debug("_load_previous_findings: loaded %d previous findings", len(findings))
128
+ return findings
129
+
130
+
131
+ def _make_finding_id(category: str, subject: str, details: str) -> str:
132
+ """Generate stable finding_id from category, subject, and details."""
133
+ content = f"{category}:{subject}:{details}"
134
+ hash_val = hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
135
+ return f"{category}_{hash_val}"
136
+
137
+
138
+ def _mark_resolved(finding: Finding) -> Finding:
139
+ """Mark a previous finding as resolved."""
140
+ return Finding(
141
+ finding_id=finding.finding_id,
142
+ category=finding.category,
143
+ title=finding.title,
144
+ severity=finding.severity,
145
+ confidence=finding.confidence,
146
+ why_it_matters=finding.why_it_matters,
147
+ suggested_fix=finding.suggested_fix,
148
+ affected_files=finding.affected_files,
149
+ evidence=finding.evidence,
150
+ source_maps=finding.source_maps,
151
+ finding_status="resolved",
152
+ source=finding.source,
153
+ freshness=finding.freshness,
154
+ status=finding.status,
155
+ )
156
+
157
+
158
+ def _get_lifecycle_status(
159
+ finding_id: str,
160
+ severity: str,
161
+ prev_by_id: dict[str, Finding],
162
+ ) -> str:
163
+ """Determine lifecycle status (new/existing/worsened)."""
164
+ if finding_id not in prev_by_id:
165
+ return "new"
166
+ prev = prev_by_id[finding_id]
167
+ if prev.severity == severity:
168
+ return "existing"
169
+ # Check if severity worsened (critical > high > medium > low)
170
+ _severity_level = {"critical": 4, "high": 3, "medium": 2, "low": 1}
171
+ curr_level = _severity_level.get(severity, 0)
172
+ prev_level = _severity_level.get(prev.severity, 0)
173
+ return "worsened" if curr_level > prev_level else "existing"
174
+
175
+
176
+ def _find_architecture_cycles(
177
+ repo_maps: RepoMaps,
178
+ prev_by_id: dict[str, Finding],
179
+ ) -> list[Finding]:
180
+ """Find architecture_cycle findings: SCC + fan_in >= 5 + hotspot score >= 60.
181
+
182
+ Trigger: ConflictEntry with domain == "structural_cycles"
183
+ AND cluster_max_fan_in >= 5 in any source entry
184
+ AND a HotspotEntry for any SCC member with hotspot_score >= 60.
185
+ """
186
+ findings: list[Finding] = []
187
+
188
+ # Build hotspot index: file -> HotspotEntry
189
+ hotspot_by_file: dict[str, Any] = {}
190
+ for h in repo_maps.hotspot:
191
+ hotspot_by_file[getattr(h, "target", "")] = h
192
+
193
+ for conflict in repo_maps.conflict:
194
+ if getattr(conflict, "domain", "") != "structural_cycles":
195
+ continue
196
+
197
+ # Parse sources to collect SCC member files and cluster_max_fan_in
198
+ sources_parsed: list[dict] = []
199
+ for src_raw in conflict.sources:
200
+ try:
201
+ src = json.loads(src_raw) if isinstance(src_raw, str) else src_raw
202
+ if isinstance(src, dict):
203
+ sources_parsed.append(src)
204
+ except (json.JSONDecodeError, TypeError):
205
+ pass
206
+
207
+ # Check fan_in threshold across all sources
208
+ max_fan_in = max(
209
+ (s.get("cluster_max_fan_in", 0) for s in sources_parsed),
210
+ default=0,
211
+ )
212
+ if max_fan_in < 5:
213
+ continue
214
+
215
+ # Collect SCC member files from sources
216
+ scc_files = [s.get("file", "") for s in sources_parsed if s.get("file")]
217
+
218
+ # Find hotspot entries for any SCC member with score >= 60
219
+ hot_entries = [
220
+ hotspot_by_file[f]
221
+ for f in scc_files
222
+ if f in hotspot_by_file and getattr(hotspot_by_file[f], "hotspot_score", 0) >= 60
223
+ ]
224
+ if not hot_entries:
225
+ continue
226
+
227
+ # Construct evidence
228
+ evidence_items: list[str] = []
229
+ for src in sources_parsed:
230
+ ev = EvidenceItem(
231
+ kind="map_entry",
232
+ map="conflict_map",
233
+ entry_id=getattr(conflict, "conflict_id", ""),
234
+ file=src.get("file", ""),
235
+ )
236
+ evidence_items.append(json.dumps(ev.to_dict(), sort_keys=True))
237
+
238
+ for h in hot_entries:
239
+ ev = EvidenceItem(
240
+ kind="map_entry",
241
+ map="hotspot_map",
242
+ entry_id=getattr(h, "target", ""),
243
+ file=getattr(h, "target", ""),
244
+ )
245
+ evidence_items.append(json.dumps(ev.to_dict(), sort_keys=True))
246
+
247
+ # Representative subject for stable ID
248
+ subject = conflict.subject
249
+ details = f"fan_in={max_fan_in}"
250
+ finding_id = _make_finding_id("architecture_cycle", subject, details)
251
+
252
+ # Severity: critical if cluster is all-production (conflict severity == "high"),
253
+ # otherwise high
254
+ severity = "critical" if conflict.severity == "high" else "high"
255
+ lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
256
+
257
+ # Build a deduplication-stable strategy hint from SCC size
258
+ scc_size = len(scc_files)
259
+ suggested_fix = (
260
+ f"Decouple cycle by introducing an abstraction layer or moving shared "
261
+ f"symbols to a common base module (cycle cluster size: {scc_size} files)"
262
+ )
263
+
264
+ finding = Finding(
265
+ finding_id=finding_id,
266
+ category="architecture_cycle",
267
+ title=f"Import cycle cluster with high fan-in ({max_fan_in}) and hotspot overlap",
268
+ severity=severity,
269
+ confidence=min(conflict.confidence, 0.9),
270
+ why_it_matters=(
271
+ "Circular imports with high fan-in create tight coupling that blocks "
272
+ "independent testing, deployment, and refactoring of affected modules."
273
+ ),
274
+ suggested_fix=suggested_fix,
275
+ affected_files=tuple(sorted(set(scc_files))),
276
+ evidence=tuple(evidence_items),
277
+ source_maps=("structural_map", "conflict_map", "hotspot_map"),
278
+ finding_status=lifecycle,
279
+ source="synthesis",
280
+ freshness="",
281
+ status="validated",
282
+ )
283
+ findings.append(finding)
284
+
285
+ _log.debug("_find_architecture_cycles: %d findings", len(findings))
286
+ return findings
287
+
288
+
289
+ def _find_state_ownership_conflicts(
290
+ repo_maps: RepoMaps,
291
+ prev_by_id: dict[str, Finding],
292
+ ) -> list[Finding]:
293
+ """Find state_ownership_conflict: shared_write + runtime node + 2+ production writers.
294
+
295
+ Trigger: ConflictEntry with action == "investigate_shared_write"
296
+ AND at least 2 source entries with file_role == "production"
297
+ AND a RuntimeNode with defined_in matching the conflict subject (target file).
298
+ """
299
+ findings: list[Finding] = []
300
+
301
+ # Index runtime nodes by defined_in file
302
+ runtime_by_file: dict[str, Any] = {}
303
+ for node in repo_maps.runtime:
304
+ f = getattr(node, "defined_in", "")
305
+ if f:
306
+ runtime_by_file[f] = node
307
+
308
+ for conflict in repo_maps.conflict:
309
+ if getattr(conflict, "action", "") != "investigate_shared_write":
310
+ continue
311
+
312
+ # Parse sources to collect writer files and roles
313
+ sources_parsed: list[dict] = []
314
+ for src_raw in conflict.sources:
315
+ try:
316
+ src = json.loads(src_raw) if isinstance(src_raw, str) else src_raw
317
+ if isinstance(src, dict):
318
+ sources_parsed.append(src)
319
+ except (json.JSONDecodeError, TypeError):
320
+ pass
321
+
322
+ # Count production writers
323
+ production_writers = [
324
+ s.get("file", "")
325
+ for s in sources_parsed
326
+ if s.get("file_role", "production") == "production" and s.get("file")
327
+ ]
328
+ if len(production_writers) < 2:
329
+ continue
330
+
331
+ # Target file is the conflict subject
332
+ target_file = conflict.subject
333
+
334
+ # Check that a RuntimeNode exists for the target file
335
+ runtime_node = runtime_by_file.get(target_file)
336
+ if runtime_node is None:
337
+ continue
338
+
339
+ # Build evidence
340
+ evidence_items: list[str] = []
341
+ for writer_file in sorted(set(production_writers)):
342
+ ev = EvidenceItem(
343
+ kind="source_location",
344
+ file=writer_file,
345
+ map="authority_map",
346
+ )
347
+ evidence_items.append(json.dumps(ev.to_dict(), sort_keys=True))
348
+
349
+ ev_conflict = EvidenceItem(
350
+ kind="map_entry",
351
+ map="conflict_map",
352
+ entry_id=getattr(conflict, "conflict_id", ""),
353
+ file=target_file,
354
+ )
355
+ evidence_items.append(json.dumps(ev_conflict.to_dict(), sort_keys=True))
356
+
357
+ ev_runtime = EvidenceItem(
358
+ kind="map_entry",
359
+ map="runtime_map",
360
+ entry_id=getattr(runtime_node, "node", ""),
361
+ file=getattr(runtime_node, "defined_in", ""),
362
+ )
363
+ evidence_items.append(json.dumps(ev_runtime.to_dict(), sort_keys=True))
364
+
365
+ details = f"writers={len(production_writers)}"
366
+ finding_id = _make_finding_id("state_ownership_conflict", target_file, details)
367
+ severity = "high"
368
+ lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
369
+
370
+ finding = Finding(
371
+ finding_id=finding_id,
372
+ category="state_ownership_conflict",
373
+ title=f"Multiple production modules write to shared target: {target_file}",
374
+ severity=severity,
375
+ confidence=min(conflict.confidence, 0.85),
376
+ why_it_matters=(
377
+ "Shared write access without a single owner leads to race conditions, "
378
+ "inconsistent state, and undefined ordering of writes at runtime."
379
+ ),
380
+ suggested_fix=(
381
+ f"Designate single owner for {target_file} and route all writes "
382
+ "through that module; demote remaining writers to readers."
383
+ ),
384
+ affected_files=tuple(sorted(set(production_writers) | {target_file})),
385
+ evidence=tuple(evidence_items),
386
+ source_maps=("authority_map", "conflict_map", "runtime_map"),
387
+ finding_status=lifecycle,
388
+ source="synthesis",
389
+ freshness="",
390
+ status="validated",
391
+ )
392
+ findings.append(finding)
393
+
394
+ _log.debug("_find_state_ownership_conflicts: %d findings", len(findings))
395
+ return findings
396
+
397
+
398
+ def _find_schema_drift_risks(
399
+ repo_maps: RepoMaps,
400
+ prev_by_id: dict[str, Finding],
401
+ ) -> list[Finding]:
402
+ """Find schema_drift_risk: contract_drift conflict + 2+ readers in data_contract map.
403
+
404
+ Trigger: ConflictEntry with domain == "contract_drift"
405
+ AND corresponding DataContractEntry with len(readers) >= 2.
406
+ """
407
+ findings: list[Finding] = []
408
+
409
+ # Index data_contract entries by entity name
410
+ contract_by_entity: dict[str, Any] = {}
411
+ for contract in repo_maps.data_contract:
412
+ entity = getattr(contract, "entity", "")
413
+ if entity:
414
+ contract_by_entity[entity] = contract
415
+
416
+ for conflict in repo_maps.conflict:
417
+ if getattr(conflict, "domain", "") != "contract_drift":
418
+ continue
419
+
420
+ entity = conflict.subject
421
+ contract = contract_by_entity.get(entity)
422
+ if contract is None:
423
+ continue
424
+
425
+ readers = getattr(contract, "readers", ())
426
+ if len(readers) < 2:
427
+ continue
428
+
429
+ # Build evidence
430
+ evidence_items: list[str] = []
431
+
432
+ ev_conflict = EvidenceItem(
433
+ kind="map_entry",
434
+ map="conflict_map",
435
+ entry_id=getattr(conflict, "conflict_id", ""),
436
+ path=entity,
437
+ )
438
+ evidence_items.append(json.dumps(ev_conflict.to_dict(), sort_keys=True))
439
+
440
+ for reader_file in sorted(readers):
441
+ ev = EvidenceItem(
442
+ kind="source_location",
443
+ file=reader_file,
444
+ map="data_contract_map",
445
+ )
446
+ evidence_items.append(json.dumps(ev.to_dict(), sort_keys=True))
447
+
448
+ details = f"readers={len(readers)}"
449
+ finding_id = _make_finding_id("schema_drift_risk", entity, details)
450
+ severity = "medium"
451
+ lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
452
+
453
+ # Collect drift_flags from contract for context
454
+ drift_flags = list(getattr(contract, "drift_flags", ()))
455
+ drift_summary = ", ".join(drift_flags[:3]) if drift_flags else "schema inconsistency"
456
+
457
+ finding = Finding(
458
+ finding_id=finding_id,
459
+ category="schema_drift_risk",
460
+ title=f"Schema drift risk in entity '{entity}' with {len(readers)} readers",
461
+ severity=severity,
462
+ confidence=min(conflict.confidence, 0.8),
463
+ why_it_matters=(
464
+ f"Schema drift ({drift_summary}) with multiple readers means "
465
+ "consumers may silently receive stale or incompatible data shapes."
466
+ ),
467
+ suggested_fix=(
468
+ "Align schema variants or add a migration step; pin all readers "
469
+ "to the canonical schema and remove divergent variants."
470
+ ),
471
+ affected_files=tuple(sorted(readers)),
472
+ evidence=tuple(evidence_items),
473
+ source_maps=("data_contract_map", "conflict_map"),
474
+ finding_status=lifecycle,
475
+ source="synthesis",
476
+ freshness="",
477
+ status="validated",
478
+ )
479
+ findings.append(finding)
480
+
481
+ _log.debug("_find_schema_drift_risks: %d findings", len(findings))
482
+ return findings
483
+
484
+
485
+ def _find_runtime_config_risks(
486
+ repo_maps: RepoMaps,
487
+ prev_by_id: dict[str, Finding],
488
+ ) -> list[Finding]:
489
+ """Find runtime_config_risk: env_coupling conflict + env_var absent from contracts.
490
+
491
+ Trigger: ConflictEntry with domain == "runtime_env_coupling"
492
+ AND none of the env_vars from the conflict sources appear as an
493
+ entity name in any DataContractEntry.
494
+ """
495
+ findings: list[Finding] = []
496
+
497
+ # Collect all entity names from data_contract map (treat as documented env vars)
498
+ contract_entities: set[str] = {
499
+ getattr(c, "entity", "") for c in repo_maps.data_contract
500
+ }
501
+ contract_entities.discard("")
502
+
503
+ for conflict in repo_maps.conflict:
504
+ if getattr(conflict, "domain", "") != "runtime_env_coupling":
505
+ continue
506
+
507
+ # Parse sources to find env_vars list
508
+ env_vars: list[str] = []
509
+ node_name: str = conflict.subject
510
+ defined_in: str = ""
511
+ for src_raw in conflict.sources:
512
+ try:
513
+ src = json.loads(src_raw) if isinstance(src_raw, str) else src_raw
514
+ if isinstance(src, dict):
515
+ env_vars.extend(src.get("env_vars", []))
516
+ defined_in = defined_in or src.get("defined_in", "")
517
+ node_name = node_name or src.get("node", "")
518
+ except (json.JSONDecodeError, TypeError):
519
+ pass
520
+
521
+ if not env_vars:
522
+ continue
523
+
524
+ # Find env vars not present in any contract entity
525
+ undocumented = [v for v in env_vars if v not in contract_entities]
526
+ if not undocumented:
527
+ continue
528
+
529
+ # Build evidence — one finding per undocumented env var to keep IDs stable
530
+ for env_var in sorted(set(undocumented)):
531
+ evidence_items: list[str] = []
532
+
533
+ ev_conflict = EvidenceItem(
534
+ kind="map_entry",
535
+ map="conflict_map",
536
+ entry_id=getattr(conflict, "conflict_id", ""),
537
+ path=env_var,
538
+ )
539
+ evidence_items.append(json.dumps(ev_conflict.to_dict(), sort_keys=True))
540
+
541
+ if defined_in:
542
+ ev_location = EvidenceItem(
543
+ kind="source_location",
544
+ file=defined_in,
545
+ map="runtime_map",
546
+ )
547
+ evidence_items.append(json.dumps(ev_location.to_dict(), sort_keys=True))
548
+
549
+ details = f"env_var={env_var}"
550
+ finding_id = _make_finding_id("runtime_config_risk", node_name, details)
551
+
552
+ # Severity: medium if env var name looks like a critical secret,
553
+ # otherwise low — keep it simple, use medium to be conservative
554
+ severity = (
555
+ "medium"
556
+ if any(kw in env_var.upper() for kw in ("KEY", "SECRET", "TOKEN", "PASSWORD", "PASS"))
557
+ else "low"
558
+ )
559
+ lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
560
+
561
+ affected: list[str] = [defined_in] if defined_in else []
562
+
563
+ finding = Finding(
564
+ finding_id=finding_id,
565
+ category="runtime_config_risk",
566
+ title=f"Undocumented env var '{env_var}' coupled to runtime node '{node_name}'",
567
+ severity=severity,
568
+ confidence=min(conflict.confidence, 0.75),
569
+ why_it_matters=(
570
+ f"Env var '{env_var}' is consumed at runtime but absent from "
571
+ "any data contract, making its presence, type, and defaults invisible "
572
+ "to operators and static analysis."
573
+ ),
574
+ suggested_fix=(
575
+ f"Document env var '{env_var}' in a data contract entity "
576
+ "or remove the runtime coupling if the var is no longer needed."
577
+ ),
578
+ affected_files=tuple(affected),
579
+ evidence=tuple(evidence_items),
580
+ source_maps=("runtime_map", "conflict_map", "data_contract_map"),
581
+ finding_status=lifecycle,
582
+ source="synthesis",
583
+ freshness="",
584
+ status="validated",
585
+ )
586
+ findings.append(finding)
587
+
588
+ _log.debug("_find_runtime_config_risks: %d findings", len(findings))
589
+ return findings
590
+
591
+
592
+ def _find_write_authority_violations(
593
+ repo_maps: RepoMaps,
594
+ prev_by_id: dict[str, Finding],
595
+ ) -> list[Finding]:
596
+ """Find write_authority_violation: illegal_write + path_constructor provenance.
597
+
598
+ Trigger: ConflictEntry with action == "investigate_illegal_write" (domain in authority map)
599
+ AND in the corresponding AuthorityDomain, the illegal writer's detected entry
600
+ has provenance == "path_constructor", meaning the write target is statically
601
+ verifiable (not a dynamic parameter).
602
+ """
603
+ findings: list[Finding] = []
604
+
605
+ # Build authority domain index: authority_domain name -> AuthorityDomain
606
+ authority_by_domain: dict[str, Any] = {}
607
+ for domain in repo_maps.authority:
608
+ name = getattr(domain, "authority_domain", "")
609
+ if name:
610
+ authority_by_domain[name] = domain
611
+
612
+ for conflict in repo_maps.conflict:
613
+ if getattr(conflict, "action", "") != "investigate_illegal_write":
614
+ continue
615
+
616
+ # The conflict subject is the illegal writer file; domain is the authority domain name
617
+ # (may have ":structural" suffix from _check_authority_vs_structural — strip it)
618
+ raw_domain = getattr(conflict, "domain", "")
619
+ authority_domain_name = raw_domain.removesuffix(":structural")
620
+ writer_file = conflict.subject
621
+
622
+ auth_domain = authority_by_domain.get(authority_domain_name)
623
+ if auth_domain is None:
624
+ continue
625
+
626
+ # Find the writer entry in writers_detected that matches writer_file
627
+ # and has provenance == "path_constructor"
628
+ path_constructor_target: str = ""
629
+ for writer_raw in auth_domain.writers_detected:
630
+ try:
631
+ writer = json.loads(writer_raw) if isinstance(writer_raw, str) else writer_raw
632
+ except (json.JSONDecodeError, TypeError):
633
+ continue
634
+ if not isinstance(writer, dict):
635
+ continue
636
+
637
+ # Match by file, location, or target key (normalise as in conflict_builder)
638
+ wfile = (
639
+ writer.get("file", "")
640
+ or writer.get("location", "")
641
+ or writer.get("target", "")
642
+ )
643
+ if wfile != writer_file:
644
+ continue
645
+ if writer.get("kind") != "illegal_write":
646
+ continue
647
+ if writer.get("provenance") == "path_constructor":
648
+ path_constructor_target = writer.get("target", writer_file)
649
+ break
650
+
651
+ if not path_constructor_target:
652
+ continue
653
+
654
+ # Build evidence
655
+ evidence_items: list[str] = []
656
+
657
+ ev_writer = EvidenceItem(
658
+ kind="source_location",
659
+ file=writer_file,
660
+ map="authority_map",
661
+ )
662
+ evidence_items.append(json.dumps(ev_writer.to_dict(), sort_keys=True))
663
+
664
+ ev_target = EvidenceItem(
665
+ kind="target_path",
666
+ path=path_constructor_target,
667
+ map="authority_map",
668
+ )
669
+ evidence_items.append(json.dumps(ev_target.to_dict(), sort_keys=True))
670
+
671
+ ev_conflict = EvidenceItem(
672
+ kind="map_entry",
673
+ map="conflict_map",
674
+ entry_id=getattr(conflict, "conflict_id", ""),
675
+ file=writer_file,
676
+ )
677
+ evidence_items.append(json.dumps(ev_conflict.to_dict(), sort_keys=True))
678
+
679
+ details = f"target={path_constructor_target}"
680
+ finding_id = _make_finding_id("write_authority_violation", writer_file, details)
681
+ severity = "high"
682
+ lifecycle = _get_lifecycle_status(finding_id, severity, prev_by_id)
683
+
684
+ canonical_owner = getattr(auth_domain, "canonical_owner", authority_domain_name)
685
+
686
+ finding = Finding(
687
+ finding_id=finding_id,
688
+ category="write_authority_violation",
689
+ title=(
690
+ f"Illegal write by '{writer_file}' to path_constructor target "
691
+ f"in domain '{authority_domain_name}'"
692
+ ),
693
+ severity=severity,
694
+ confidence=min(conflict.confidence, 0.9),
695
+ why_it_matters=(
696
+ f"The file writes to a statically-verifiable path ({path_constructor_target}) "
697
+ f"that belongs to domain '{authority_domain_name}' (canonical owner: "
698
+ f"{canonical_owner}), bypassing authority controls."
699
+ ),
700
+ suggested_fix=(
701
+ f"Add '{writer_file}' to allowed_writers for domain "
702
+ f"'{authority_domain_name}', or refactor writes through the canonical "
703
+ f"owner ({canonical_owner})."
704
+ ),
705
+ affected_files=tuple(sorted({writer_file, path_constructor_target})),
706
+ evidence=tuple(evidence_items),
707
+ source_maps=("authority_map", "conflict_map"),
708
+ finding_status=lifecycle,
709
+ source="synthesis",
710
+ freshness="",
711
+ status="validated",
712
+ )
713
+ findings.append(finding)
714
+
715
+ _log.debug("_find_write_authority_violations: %d findings", len(findings))
716
+ return findings