vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,818 @@
1
+ """Conflict map builder -- Map 5.
2
+
3
+ Performs pairwise diff between authority, runtime, structural and contract maps
4
+ from an existing RepoMaps container to detect inter-map conflicts.
5
+
6
+ Generic: operates on any RepoMaps, does not assume Vigil project layout.
7
+
8
+ Public API:
9
+ build_conflict_map(repo_maps, previous_conflicts=()) -> list[ConflictEntry]
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ from collections.abc import Sequence
16
+ from datetime import datetime, timezone
17
+
18
+ from .fingerprint import make_conflict_id
19
+ from .map_common import classify_file_role
20
+ from .map_errors import MapBuildConflictBudgetExceeded
21
+ from .map_models import AuthorityDomain, DataContractEntry, RepoMaps, StructuralEntry
22
+ from .map_models_ext import ConflictEntry
23
+
24
+ __all__ = ["build_conflict_map"]
25
+
26
+ _log = logging.getLogger(__name__)
27
+
28
+ # Maximum number of open conflicts allowed before budget raise.
29
+ _CONFLICT_BUDGET = 500
30
+
31
+ # Metadata constants for all generated entries.
32
+ _SOURCE = "inter_map_diff"
33
+ _CONFIDENCE = 0.9
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Internal helpers
38
+ # ---------------------------------------------------------------------------
39
+
40
+ def _utc_now() -> str:
41
+ return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
42
+
43
+
44
+ def _normalize_writer(raw: "str | dict") -> dict:
45
+ """Normalise a writer entry to a dict with a populated 'file' key.
46
+
47
+ Handles:
48
+ - dict passed directly (already parsed)
49
+ - JSON-serialised string (from AuthorityDomain.writers_detected storage)
50
+ - plain string (treated as a file path)
51
+
52
+ Field resolution order for the file path:
53
+ 1. ``file`` -- canonical key after Wave A1 fix
54
+ 2. ``location`` -- alternate key used by some authority_builder versions
55
+ 3. ``target`` -- fallback for older entries
56
+ """
57
+ if isinstance(raw, dict):
58
+ obj: dict = raw
59
+ else:
60
+ try:
61
+ parsed = json.loads(raw)
62
+ obj = parsed if isinstance(parsed, dict) else {}
63
+ except (json.JSONDecodeError, TypeError):
64
+ # Treat the raw value itself as a file path.
65
+ return {"file": str(raw), "kind": "unknown"}
66
+
67
+ # Resolve file path from the first non-empty key.
68
+ file_path = (
69
+ obj.get("file", "")
70
+ or obj.get("location", "")
71
+ or obj.get("target", "")
72
+ )
73
+ if file_path and "file" not in obj:
74
+ # Return a normalised copy so downstream code always uses "file".
75
+ obj = dict(obj, file=str(file_path))
76
+ return obj
77
+
78
+
79
+ def _group_files_by_scc(
80
+ entries: dict[str, "StructuralEntry"],
81
+ ) -> list[frozenset[str]]:
82
+ """Group files into strongly connected components via union-find.
83
+
84
+ Args:
85
+ entries: dict mapping file path to StructuralEntry.
86
+
87
+ Returns:
88
+ List of frozensets, each representing one SCC (set of files in cycle).
89
+ Files not in any cycle are omitted.
90
+ """
91
+ # Collect all files mentioned in any cycle.
92
+ cycle_files: set[str] = set()
93
+ for entry in entries.values():
94
+ if entry.cycles:
95
+ cycle_files.add(entry.file)
96
+ cycle_files.update(entry.cycles)
97
+
98
+ parent: dict[str, str] = {f: f for f in cycle_files}
99
+
100
+ def find(f: str) -> str:
101
+ if parent[f] != f:
102
+ parent[f] = find(parent[f])
103
+ return parent[f]
104
+
105
+ def union(a: str, b: str) -> None:
106
+ if a not in parent:
107
+ parent[a] = a
108
+ if b not in parent:
109
+ parent[b] = b
110
+ pa, pb = find(a), find(b)
111
+ if pa != pb:
112
+ parent[pa] = pb
113
+
114
+ # Union files that are in the same cycle.
115
+ for f, entry in entries.items():
116
+ if not entry.cycles:
117
+ continue
118
+ for cycle_member in entry.cycles:
119
+ union(f, cycle_member)
120
+
121
+ # Group by root.
122
+ groups: dict[str, set[str]] = {}
123
+ for f in parent:
124
+ root = find(f)
125
+ groups.setdefault(root, set()).add(f)
126
+
127
+ return [frozenset(group) for group in groups.values()]
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # Pairwise checks
132
+ # ---------------------------------------------------------------------------
133
+
134
+ def _check_authority_vs_runtime(
135
+ authority_map: tuple,
136
+ runtime_map: tuple,
137
+ ) -> list[tuple]:
138
+ """Authority vs runtime: illegal writers detected in authority -> conflict.
139
+
140
+ Each tuple: (domain, subject, sources, severity, action).
141
+ """
142
+ conflicts: list[tuple] = []
143
+
144
+ # Index runtime nodes by defined_in file for quick lookup.
145
+ runtime_by_file: dict[str, list] = {}
146
+ for node in runtime_map:
147
+ f = getattr(node, "defined_in", "")
148
+ runtime_by_file.setdefault(f, []).append(node)
149
+
150
+ for domain in authority_map:
151
+ if not isinstance(domain, AuthorityDomain):
152
+ continue
153
+ for writer_raw in domain.writers_detected:
154
+ writer = _normalize_writer(writer_raw)
155
+ writer_file = writer.get("file", "")
156
+ writer_kind = writer.get("kind", "unknown")
157
+ if writer_kind != "illegal_write":
158
+ continue
159
+
160
+ sources = [
161
+ {
162
+ "map": "authority_map",
163
+ "claim": "illegal_write_detected",
164
+ "file": writer_file,
165
+ "domain": domain.authority_domain,
166
+ },
167
+ ]
168
+
169
+ # Enrich with runtime evidence if the writer file has a node there.
170
+ if writer_file in runtime_by_file:
171
+ sources.append({
172
+ "map": "runtime_map",
173
+ "claim": "writes_observed",
174
+ "file": writer_file,
175
+ })
176
+
177
+ cid = make_conflict_id(
178
+ domain=domain.authority_domain,
179
+ subject=writer_file,
180
+ sources=sources,
181
+ )
182
+ conflicts.append((
183
+ cid,
184
+ domain.authority_domain,
185
+ writer_file,
186
+ sources,
187
+ "high",
188
+ "investigate_illegal_write",
189
+ ))
190
+
191
+ _log.debug("_check_authority_vs_runtime: %d conflicts found", len(conflicts))
192
+ return conflicts
193
+
194
+
195
+ def _check_authority_vs_structural(
196
+ authority_map: tuple,
197
+ structural_map: tuple,
198
+ ) -> list[tuple]:
199
+ """Authority vs structural: illegal writer also appears as an importer
200
+ of the canonical_owner -> structural confirms downstream coupling.
201
+ """
202
+ conflicts: list[tuple] = []
203
+
204
+ # Index structural entries by file for fast lookup.
205
+ structural_by_file: dict[str, StructuralEntry] = {}
206
+ for entry in structural_map:
207
+ if isinstance(entry, StructuralEntry):
208
+ structural_by_file[entry.file] = entry
209
+
210
+ for domain in authority_map:
211
+ if not isinstance(domain, AuthorityDomain):
212
+ continue
213
+ canonical = domain.canonical_owner
214
+
215
+ for writer_raw in domain.writers_detected:
216
+ writer = _normalize_writer(writer_raw)
217
+ writer_file = writer.get("file", "")
218
+ writer_kind = writer.get("kind", "unknown")
219
+ if writer_kind != "illegal_write":
220
+ continue
221
+
222
+ # Check if writer appears in imports_in of canonical owner's
223
+ # structural entry, suggesting a downstream reader that writes back.
224
+ entry = structural_by_file.get(canonical)
225
+ if entry is None:
226
+ continue
227
+ if writer_file not in entry.imports_in:
228
+ continue
229
+
230
+ sources = [
231
+ {
232
+ "map": "authority_map",
233
+ "claim": "illegal_write_detected",
234
+ "file": writer_file,
235
+ "domain": domain.authority_domain,
236
+ },
237
+ {
238
+ "map": "structural_map",
239
+ "claim": "downstream_reader_writes_back",
240
+ "canonical": canonical,
241
+ "writer": writer_file,
242
+ },
243
+ ]
244
+ cid = make_conflict_id(
245
+ domain=domain.authority_domain + ":structural",
246
+ subject=writer_file,
247
+ sources=sources,
248
+ )
249
+ conflicts.append((
250
+ cid,
251
+ domain.authority_domain + ":structural",
252
+ writer_file,
253
+ sources,
254
+ "medium",
255
+ "review_coupling",
256
+ ))
257
+
258
+ _log.debug("_check_authority_vs_structural: %d conflicts found", len(conflicts))
259
+ return conflicts
260
+
261
+
262
+ def _check_contract_vs_structural(
263
+ contract_map: tuple,
264
+ structural_map: tuple,
265
+ ) -> list[tuple]:
266
+ """Contract vs structural: a variant file does not import the canonical_schema
267
+ -> variant is evolving independently.
268
+ """
269
+ conflicts: list[tuple] = []
270
+
271
+ # Index structural entries by file for fast lookup.
272
+ structural_by_file: dict[str, StructuralEntry] = {}
273
+ for entry in structural_map:
274
+ if isinstance(entry, StructuralEntry):
275
+ structural_by_file[entry.file] = entry
276
+
277
+ for contract in contract_map:
278
+ if not isinstance(contract, DataContractEntry):
279
+ continue
280
+ canonical_schema = contract.canonical_schema
281
+ if not canonical_schema:
282
+ continue
283
+
284
+ for variant_raw in contract.variants:
285
+ # Variants are stored as JSON-serialised dicts or plain strings.
286
+ try:
287
+ variant_obj = json.loads(variant_raw) if isinstance(variant_raw, str) else variant_raw
288
+ except (json.JSONDecodeError, TypeError):
289
+ variant_obj = {}
290
+
291
+ variant_file = variant_obj.get("file", "") if isinstance(variant_obj, dict) else str(variant_raw)
292
+ if not variant_file:
293
+ continue
294
+
295
+ # Check if variant file imports canonical_schema.
296
+ entry = structural_by_file.get(variant_file)
297
+ if entry is None:
298
+ # Variant file not in structural map — not a conflict here.
299
+ continue
300
+
301
+ if canonical_schema in entry.imports_out:
302
+ # Correctly imports canonical → no conflict.
303
+ continue
304
+
305
+ sources = [
306
+ {
307
+ "map": "contract_map",
308
+ "claim": "variant_location",
309
+ "entity": contract.entity,
310
+ "variant_file": variant_file,
311
+ "canonical_schema": canonical_schema,
312
+ },
313
+ {
314
+ "map": "structural_map",
315
+ "claim": "missing_canonical_import",
316
+ "file": variant_file,
317
+ },
318
+ ]
319
+ cid = make_conflict_id(
320
+ domain=contract.entity,
321
+ subject=variant_file,
322
+ sources=sources,
323
+ )
324
+ conflicts.append((
325
+ cid,
326
+ contract.entity,
327
+ variant_file,
328
+ sources,
329
+ "medium",
330
+ "add_canonical_import_or_merge",
331
+ ))
332
+
333
+ _log.debug("_check_contract_vs_structural: %d conflicts found", len(conflicts))
334
+ return conflicts
335
+
336
+
337
+ def _check_authority_shared_writes(
338
+ authority_map: tuple,
339
+ ) -> list[tuple]:
340
+ """Authority shared writes: auto-discovered domains with multiple writers
341
+ from different modules writing to the same target file.
342
+
343
+ Returns raw tuples for ConflictEntry construction:
344
+ (cid, domain, subject, sources, severity, action)
345
+ Test-only shared writes (all writers are test/fixture) get "low" severity.
346
+ """
347
+ raw: list[tuple] = []
348
+
349
+ for domain in authority_map:
350
+ if not isinstance(domain, AuthorityDomain):
351
+ continue
352
+ # Only process auto-discovered inferred domains
353
+ if domain.status != "inferred":
354
+ continue
355
+
356
+ # Group writers by target, collecting file_role
357
+ target_to_writers_with_roles: dict[str, list[tuple[str, str]]] = {}
358
+ for writer_raw in domain.writers_detected:
359
+ writer = _normalize_writer(writer_raw)
360
+ kind = writer.get("kind", "unknown")
361
+ if kind != "shared_write":
362
+ continue
363
+ target = writer.get("target", "")
364
+ location = writer.get("file", "") or writer.get("location", "")
365
+ file_role = writer.get("file_role", "production")
366
+ if target and location:
367
+ target_to_writers_with_roles.setdefault(target, []).append((location, file_role))
368
+
369
+ for target, writers_with_roles in sorted(target_to_writers_with_roles.items()):
370
+ if len(writers_with_roles) < 2:
371
+ continue
372
+ writers = [w[0] for w in writers_with_roles]
373
+ roles = {w[1] for w in writers_with_roles}
374
+ # Check if all writers are test/fixture (no production)
375
+ is_test_only = "production" not in roles
376
+ sources = [
377
+ {
378
+ "map": "authority_map",
379
+ "claim": "shared_write_detected",
380
+ "file": w,
381
+ "domain": domain.authority_domain,
382
+ "target": target,
383
+ }
384
+ for w in sorted(writers)
385
+ ]
386
+ cid = make_conflict_id(
387
+ domain=domain.authority_domain,
388
+ subject=target,
389
+ sources=sources,
390
+ )
391
+ severity = "low" if is_test_only else "medium"
392
+ raw.append((cid, domain.authority_domain, target, sources, severity, "investigate_shared_write"))
393
+
394
+ _log.debug("_check_authority_shared_writes: %d conflicts found", len(raw))
395
+ return raw
396
+
397
+
398
+ def _check_structural_cycles(
399
+ structural_map: tuple,
400
+ ) -> list[tuple]:
401
+ """Structural cycles: per-SCC (strongly connected component) grouping.
402
+
403
+ Groups files that share import cycles into SCCs via union-find, then
404
+ emits one conflict per SCC instead of one per file. This avoids N
405
+ duplicate findings for an N-node cycle.
406
+
407
+ Filtering criteria (all must hold):
408
+ 1. SCC has >= 2 files.
409
+ 2. Max fan-in across all SCC files with known StructuralEntry >= 3.
410
+
411
+ Severity:
412
+ "high" – all SCC files with entries are production.
413
+ "medium" – mixed production + test/fixture.
414
+ "low" – all SCC files with entries are test/fixture.
415
+
416
+ Returns raw tuples for ConflictEntry construction:
417
+ (cid, domain, subject, sources, severity, action)
418
+ """
419
+ # Build lookup: file path -> StructuralEntry (only files that have one).
420
+ entries: dict[str, StructuralEntry] = {}
421
+ for entry in structural_map:
422
+ if isinstance(entry, StructuralEntry):
423
+ entries[entry.file] = entry
424
+
425
+ # Group files into SCCs using union-find over cycle membership.
426
+ scc_list = _group_files_by_scc(entries)
427
+
428
+ raw: list[tuple] = []
429
+
430
+ for scc in scc_list:
431
+ # Filter 1: must have >= 2 files in the cycle cluster (includes ghost members).
432
+ if len(scc) < 2:
433
+ continue
434
+
435
+ # Gather metadata only for SCC files that have a StructuralEntry.
436
+ # Files referenced in cycles but absent from the structural map are
437
+ # still part of the SCC geometrically, but carry no fan-in data.
438
+ known_entries = [entries[f] for f in scc if f in entries]
439
+
440
+ # Filter 2: max fan-in across known entries must be >= 3.
441
+ if not known_entries:
442
+ continue
443
+ max_fan_in = max(len(e.imports_in) for e in known_entries)
444
+ if max_fan_in < 3:
445
+ continue
446
+
447
+ # Compute file roles for known entries.
448
+ roles = {e.file: classify_file_role(e.file) for e in known_entries}
449
+
450
+ # Determine severity based on role composition of known entries.
451
+ has_production = any(r == "production" for r in roles.values())
452
+ all_production = all(r == "production" for r in roles.values())
453
+
454
+ if all_production:
455
+ cluster_severity = "high"
456
+ elif has_production:
457
+ cluster_severity = "medium"
458
+ else:
459
+ cluster_severity = "low"
460
+
461
+ # Representative subject: lexicographically first file in SCC (stable).
462
+ subject = sorted(scc)[0]
463
+
464
+ # Build sources list: one entry per SCC file, tagged with what's known.
465
+ sources: list[dict] = []
466
+ for f in sorted(scc):
467
+ entry = entries.get(f)
468
+ source_item: dict = {
469
+ "map": "structural_map",
470
+ "claim": "import_cycle",
471
+ "file": f,
472
+ "cluster_size": len(scc),
473
+ "cluster_max_fan_in": max_fan_in,
474
+ }
475
+ if entry is not None:
476
+ source_item["file_role"] = roles[f]
477
+ source_item["fan_in"] = len(entry.imports_in)
478
+ source_item["cycle_members"] = list(entry.cycles)
479
+ sources.append(source_item)
480
+
481
+ cid = make_conflict_id(domain="structural_cycles", subject=subject, sources=sources)
482
+ raw.append((cid, "structural_cycles", subject, sources, cluster_severity, "break_cycle"))
483
+
484
+ _log.debug(
485
+ "_check_structural_cycles: %d SCC clusters -> %d conflicts",
486
+ len(scc_list), len(raw),
487
+ )
488
+ return raw
489
+
490
+
491
+ def _check_contract_drift(
492
+ contract_map: tuple,
493
+ ) -> list[tuple]:
494
+ """Data contract drift: schema inconsistencies.
495
+
496
+ Returns raw tuples for ConflictEntry construction:
497
+ (cid, domain, subject, sources, severity, action)
498
+ """
499
+ raw: list[tuple] = []
500
+ from .map_models import DataContractEntry
501
+
502
+ for contract in contract_map:
503
+ if not isinstance(contract, DataContractEntry):
504
+ continue
505
+ drift_flags = getattr(contract, "drift_flags", ())
506
+ if not drift_flags:
507
+ continue
508
+ sources = [{
509
+ "map": "data_contract_map",
510
+ "claim": flag,
511
+ "entity": contract.entity,
512
+ } for flag in drift_flags]
513
+ cid = make_conflict_id(domain="contract_drift", subject=contract.entity, sources=sources)
514
+ raw.append((cid, "contract_drift", contract.entity, sources, "medium", "investigate_contract_drift"))
515
+
516
+ _log.debug("_check_contract_drift: %d conflicts found", len(raw))
517
+ return raw
518
+
519
+
520
+ def _check_runtime_env_coupling(
521
+ runtime_map: tuple,
522
+ ) -> list[tuple]:
523
+ """Runtime environment coupling: nodes with unvalidated env var dependencies.
524
+
525
+ Downgrade severity for test nodes (less critical in production).
526
+ Returns raw tuples for ConflictEntry construction:
527
+ (cid, domain, subject, sources, severity, action)
528
+ """
529
+ raw: list[tuple] = []
530
+ from .map_models import RuntimeNode
531
+
532
+ for node in runtime_map:
533
+ if not isinstance(node, RuntimeNode):
534
+ continue
535
+ depends_on_env = getattr(node, "depends_on_env", ())
536
+ if not depends_on_env:
537
+ continue
538
+ # Check file role from defined_in field
539
+ file_role = classify_file_role(node.defined_in) if hasattr(node, "defined_in") else "production"
540
+ # Test/fixture nodes are less critical (skip entirely or downgrade)
541
+ if file_role != "production":
542
+ continue # Skip test/fixture env coupling
543
+ sources = [{
544
+ "map": "runtime_map",
545
+ "claim": "env_var_dependency",
546
+ "node": node.node,
547
+ "defined_in": node.defined_in,
548
+ "file_role": file_role,
549
+ "env_vars": list(depends_on_env),
550
+ }]
551
+ cid = make_conflict_id(domain="runtime_env_coupling", subject=node.node, sources=sources)
552
+ raw.append((cid, "runtime_env_coupling", node.node, sources, "low", "document_env_contract"))
553
+
554
+ _log.debug("_check_runtime_env_coupling: %d conflicts found", len(raw))
555
+ return raw
556
+
557
+
558
+ # ---------------------------------------------------------------------------
559
+ # Lifecycle resolution
560
+ # ---------------------------------------------------------------------------
561
+
562
+ def _populate_conflict_evidence(
563
+ domain: str,
564
+ subject: str,
565
+ sources: list[dict],
566
+ ) -> tuple[str, ...]:
567
+ """Build evidence tuples from conflict sources.
568
+
569
+ Evidence strategy:
570
+ 1. For source_location: each file in sources with claim markers
571
+ 2. For write_conflicts domain: target_path evidence from subject
572
+ 3. Representative list (max 10 items) to avoid bloat
573
+
574
+ Returns:
575
+ Tuple of JSON-serialized EvidenceItem strings.
576
+ """
577
+ from .map_models_findings import EvidenceItem
578
+
579
+ evidence_items: list[EvidenceItem] = []
580
+
581
+ # Add source files from sources list (representative sample)
582
+ added_files = set()
583
+ for src in sources[:5]: # Limit to first 5 sources to avoid bloat
584
+ if not isinstance(src, dict):
585
+ continue
586
+ file = src.get("file", "")
587
+ if file and file not in added_files:
588
+ evidence_items.append(EvidenceItem(
589
+ kind="source_location",
590
+ file=file,
591
+ line=src.get("line"),
592
+ map="structural" if "cycle" in src.get("claim", "") else "authority",
593
+ ))
594
+ added_files.add(file)
595
+
596
+ # For write_conflicts or shared_write domains, add target as target_path evidence
597
+ if domain in ("write_conflicts", "shared_write") or "write" in domain:
598
+ evidence_items.append(EvidenceItem(
599
+ kind="target_path",
600
+ path=subject,
601
+ ))
602
+
603
+ # For structural_cycles, add cycle member files as source locations
604
+ if domain == "structural_cycles":
605
+ cycle_members = set()
606
+ for src in sources:
607
+ if isinstance(src, dict):
608
+ members = src.get("cycle_members", [])
609
+ if isinstance(members, list):
610
+ cycle_members.update(m for m in members if m not in added_files)
611
+ # Add top 3 cycle members (representative)
612
+ for member in sorted(cycle_members)[:3]:
613
+ evidence_items.append(EvidenceItem(
614
+ kind="source_location",
615
+ file=member,
616
+ map="structural",
617
+ ))
618
+ added_files.add(member)
619
+
620
+ # Serialize to JSON strings
621
+ result: list[str] = []
622
+ for item in evidence_items[:10]: # Cap at 10 items total
623
+ result.append(json.dumps(item.to_dict(), sort_keys=True))
624
+
625
+ return tuple(result)
626
+
627
+
628
+ def _apply_lifecycle(
629
+ raw_conflicts: list[tuple],
630
+ previous_by_id: dict[str, ConflictEntry],
631
+ freshness: str,
632
+ ) -> tuple[list[ConflictEntry], set[str]]:
633
+ """Build ConflictEntry objects applying previous-status inheritance.
634
+
635
+ Returns:
636
+ (entries, seen_ids) where seen_ids is the set of conflict IDs
637
+ present in the new build.
638
+ """
639
+ result: list[ConflictEntry] = []
640
+ seen_ids: set[str] = set()
641
+
642
+ for cid, domain, subject, sources, severity, action in raw_conflicts:
643
+ seen_ids.add(cid)
644
+ prev = previous_by_id.get(cid)
645
+
646
+ # Lifecycle: preserve resolved status from previous build.
647
+ # validated status from previous is also preserved (manual triage result).
648
+ # Default for new (unseen) conflicts is "open".
649
+ if prev is not None and prev.conflict_status == "resolved":
650
+ conflict_status = "resolved"
651
+ metadata_status = prev.status # carry "validated" or whatever it was
652
+ elif prev is not None and prev.conflict_status == "validated":
653
+ conflict_status = "validated"
654
+ metadata_status = prev.status
655
+ else:
656
+ conflict_status = "open"
657
+ metadata_status = "open"
658
+
659
+ # Populate evidence from sources
660
+ evidence = _populate_conflict_evidence(domain, subject, sources)
661
+
662
+ result.append(ConflictEntry(
663
+ conflict_id=cid,
664
+ domain=domain,
665
+ subject=subject,
666
+ sources=tuple(json.dumps(s, sort_keys=True) for s in sources),
667
+ severity=severity,
668
+ conflict_status=conflict_status,
669
+ action=action,
670
+ source=_SOURCE,
671
+ evidence=evidence,
672
+ confidence=_CONFIDENCE,
673
+ freshness=freshness,
674
+ status=metadata_status,
675
+ ))
676
+
677
+ return result, seen_ids
678
+
679
+
680
+ def _carry_resolved(
681
+ previous_by_id: dict[str, ConflictEntry],
682
+ seen_ids: set[str],
683
+ freshness: str,
684
+ ) -> list[ConflictEntry]:
685
+ """Return previously present conflicts that have disappeared -> mark resolved."""
686
+ carried: list[ConflictEntry] = []
687
+ for cid, prev in previous_by_id.items():
688
+ if cid in seen_ids:
689
+ continue
690
+ if prev.conflict_status == "resolved":
691
+ # Already resolved, carry as-is but update freshness.
692
+ carried.append(ConflictEntry(
693
+ conflict_id=prev.conflict_id,
694
+ domain=prev.domain,
695
+ subject=prev.subject,
696
+ sources=prev.sources,
697
+ severity=prev.severity,
698
+ conflict_status="resolved",
699
+ action=prev.action,
700
+ source=prev.source,
701
+ evidence=prev.evidence,
702
+ confidence=prev.confidence,
703
+ freshness=freshness,
704
+ status=prev.status,
705
+ ))
706
+ else:
707
+ # Was open/in_progress, now gone → mark resolved.
708
+ carried.append(ConflictEntry(
709
+ conflict_id=prev.conflict_id,
710
+ domain=prev.domain,
711
+ subject=prev.subject,
712
+ sources=prev.sources,
713
+ severity=prev.severity,
714
+ conflict_status="resolved",
715
+ action=prev.action,
716
+ source=prev.source,
717
+ evidence=prev.evidence,
718
+ confidence=prev.confidence,
719
+ freshness=freshness,
720
+ status=prev.status,
721
+ ))
722
+ return carried
723
+
724
+
725
+ # ---------------------------------------------------------------------------
726
+ # Public API
727
+ # ---------------------------------------------------------------------------
728
+
729
+ def build_conflict_map(
730
+ repo_maps: RepoMaps,
731
+ previous_conflicts: Sequence[ConflictEntry] = (),
732
+ ) -> list[ConflictEntry]:
733
+ """Build a conflict map from pairwise inter-map diffs.
734
+
735
+ Checks performed:
736
+ 1. authority vs runtime -- illegal writers seen in authority but
737
+ also observed as writing at runtime.
738
+ 2. authority vs structural -- illegal writer appears as downstream
739
+ reader (imports_in) of the canonical owner.
740
+ 3. contract vs structural -- variant file does not import canonical_schema.
741
+
742
+ Conflict lifecycle:
743
+ - New conflict (not in previous): status = "open".
744
+ - Conflict matching previous by ID where previous was "resolved":
745
+ preserve "resolved".
746
+ - Conflict in previous but absent from new build: marked "resolved",
747
+ included in output.
748
+
749
+ Args:
750
+ repo_maps: Container with all available maps.
751
+ previous_conflicts: Sequence of ConflictEntry from prior build for
752
+ lifecycle/status preservation.
753
+
754
+ Returns:
755
+ Sorted list of ConflictEntry (by conflict_id).
756
+
757
+ Raises:
758
+ MapBuildConflictBudgetExceeded: If open conflict count exceeds 500.
759
+ """
760
+ _log.info(
761
+ "build_conflict_map: starting pairwise diff -- structural=%d runtime=%d "
762
+ "contract=%d authority=%d previous=%d",
763
+ len(repo_maps.structural),
764
+ len(repo_maps.runtime),
765
+ len(repo_maps.data_contract),
766
+ len(repo_maps.authority),
767
+ len(previous_conflicts),
768
+ )
769
+
770
+ freshness = _utc_now()
771
+
772
+ # Index previous conflicts by ID for O(1) lookups.
773
+ previous_by_id: dict[str, ConflictEntry] = {
774
+ c.conflict_id: c for c in previous_conflicts
775
+ }
776
+
777
+ # Collect raw conflict tuples from all checks.
778
+ raw: list[tuple] = []
779
+ raw.extend(_check_authority_vs_runtime(repo_maps.authority, repo_maps.runtime))
780
+ raw.extend(_check_authority_vs_structural(repo_maps.authority, repo_maps.structural))
781
+ raw.extend(_check_authority_shared_writes(repo_maps.authority))
782
+ raw.extend(_check_contract_vs_structural(repo_maps.data_contract, repo_maps.structural))
783
+ raw.extend(_check_structural_cycles(repo_maps.structural))
784
+ raw.extend(_check_contract_drift(repo_maps.data_contract))
785
+ raw.extend(_check_runtime_env_coupling(repo_maps.runtime))
786
+
787
+ # Deduplicate by conflict_id (keep first occurrence).
788
+ seen_raw: set[str] = set()
789
+ deduped: list[tuple] = []
790
+ for item in raw:
791
+ cid = item[0]
792
+ if cid not in seen_raw:
793
+ seen_raw.add(cid)
794
+ deduped.append(item)
795
+
796
+ # Apply lifecycle.
797
+ new_entries, seen_ids = _apply_lifecycle(deduped, previous_by_id, freshness)
798
+
799
+ # Carry resolved conflicts that have disappeared.
800
+ carried = _carry_resolved(previous_by_id, seen_ids, freshness)
801
+
802
+ result = new_entries + carried
803
+
804
+ # Sort for deterministic output.
805
+ result.sort(key=lambda c: c.conflict_id)
806
+
807
+ # Budget check.
808
+ open_count = sum(1 for c in result if c.conflict_status == "open")
809
+ _log.info(
810
+ "build_conflict_map: total=%d open=%d resolved=%d",
811
+ len(result), open_count, len(result) - open_count,
812
+ )
813
+ if open_count > _CONFLICT_BUDGET:
814
+ raise MapBuildConflictBudgetExceeded(
815
+ "Open conflict count %d exceeds budget %d" % (open_count, _CONFLICT_BUDGET)
816
+ )
817
+
818
+ return result