vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,130 @@
1
+ """Data models for findings map (Map 8).
2
+
3
+ Finding -- synthesized diagnosis of architecture/conflict issues across all maps.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import sys
8
+ import logging
9
+ from dataclasses import dataclass
10
+ from typing import Any
11
+
12
+ __all__ = [
13
+ "EvidenceItem",
14
+ "Finding",
15
+ ]
16
+
17
+ _log = logging.getLogger(__name__)
18
+
19
+ _DATACLASS_KWARGS: dict[str, Any] = {"frozen": True}
20
+ if sys.version_info >= (3, 10):
21
+ _DATACLASS_KWARGS["slots"] = True
22
+ _DATACLASS_KWARGS["kw_only"] = True
23
+
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Map 8: Finding Entry
27
+ # ---------------------------------------------------------------------------
28
+
29
+ @dataclass(**_DATACLASS_KWARGS)
30
+ class EvidenceItem:
31
+ """Evidence pointing to a source of the finding."""
32
+ kind: str # "source_location" | "map_entry" | "target_path"
33
+ file: str = "" # for source_location
34
+ line: int | None = None # for source_location
35
+ map: str = "" # for map_entry or source_location
36
+ entry_id: str = "" # for map_entry
37
+ path: str = "" # for target_path
38
+
39
+ def to_dict(self) -> dict:
40
+ result = {"kind": self.kind}
41
+ if self.file:
42
+ result["file"] = self.file
43
+ if self.line is not None:
44
+ result["line"] = self.line
45
+ if self.map:
46
+ result["map"] = self.map
47
+ if self.entry_id:
48
+ result["entry_id"] = self.entry_id
49
+ if self.path:
50
+ result["path"] = self.path
51
+ return result
52
+
53
+ @classmethod
54
+ def from_dict(cls, d: dict) -> "EvidenceItem":
55
+ return cls(
56
+ kind=str(d.get("kind", "unknown")),
57
+ file=str(d.get("file", "")),
58
+ line=d.get("line"),
59
+ map=str(d.get("map", "")),
60
+ entry_id=str(d.get("entry_id", "")),
61
+ path=str(d.get("path", "")),
62
+ )
63
+
64
+
65
+ @dataclass(**_DATACLASS_KWARGS)
66
+ class Finding:
67
+ """One finding in the findings map (Map 8)."""
68
+ finding_id: str # stable hash of finding
69
+ category: str # architecture_cycle | state_ownership_conflict |
70
+ # schema_drift_risk | runtime_config_risk |
71
+ # write_authority_violation
72
+ title: str
73
+ severity: str # critical | high | medium | low
74
+ confidence: float
75
+ why_it_matters: str
76
+ suggested_fix: str
77
+ affected_files: tuple[str, ...]
78
+ evidence: tuple[str, ...] # JSON-serialised EvidenceItem strings
79
+ source_maps: tuple[str, ...] # which maps contributed to this finding
80
+ finding_status: str # new | existing | worsened | resolved | accepted
81
+ # Metadata
82
+ source: str
83
+ freshness: str
84
+ status: str
85
+
86
+ def to_dict(self) -> dict:
87
+ import json as _json
88
+ return {
89
+ "finding_id": self.finding_id,
90
+ "category": self.category,
91
+ "title": self.title,
92
+ "severity": self.severity,
93
+ "confidence": self.confidence,
94
+ "why_it_matters": self.why_it_matters,
95
+ "suggested_fix": self.suggested_fix,
96
+ "affected_files": list(self.affected_files),
97
+ "evidence": [
98
+ _json.loads(e) if isinstance(e, str) else e for e in self.evidence
99
+ ],
100
+ "source_maps": list(self.source_maps),
101
+ "finding_status": self.finding_status,
102
+ "source": self.source,
103
+ "freshness": self.freshness,
104
+ "status": self.status,
105
+ }
106
+
107
+ @classmethod
108
+ def from_dict(cls, d: dict) -> "Finding":
109
+ import json as _json
110
+ def _serialize_evidence(items: list) -> tuple[str, ...]:
111
+ return tuple(
112
+ _json.dumps(item, sort_keys=True) if isinstance(item, dict) else str(item)
113
+ for item in items
114
+ )
115
+ return cls(
116
+ finding_id=str(d.get("finding_id", "")),
117
+ category=str(d.get("category", "")),
118
+ title=str(d.get("title", "")),
119
+ severity=str(d.get("severity", "medium")),
120
+ confidence=float(d.get("confidence", 0.5)),
121
+ why_it_matters=str(d.get("why_it_matters", "")),
122
+ suggested_fix=str(d.get("suggested_fix", "")),
123
+ affected_files=tuple(d.get("affected_files", [])),
124
+ evidence=_serialize_evidence(d.get("evidence", [])),
125
+ source_maps=tuple(d.get("source_maps", [])),
126
+ finding_status=str(d.get("finding_status", "new")),
127
+ source=str(d.get("source", "synthesis")),
128
+ freshness=str(d.get("freshness", "")),
129
+ status=str(d.get("status", "validated")),
130
+ )
@@ -0,0 +1,455 @@
1
+ """Atomic read/write, filelock-wrapped map storage for the map builder subsystem.
2
+
3
+ Generic design: all output goes to <project_dir>/.cortex/maps/ by default.
4
+ This works for any target project (user project, or Vigil self-diag via
5
+ `--project ./`).
6
+
7
+ Atomic write pattern (tempfile.mkstemp + os.replace):
8
+ fd, tmp_path = tempfile.mkstemp(dir=..., prefix=..., suffix=".tmp")
9
+ try:
10
+ with os.fdopen(fd, "w", encoding="utf-8") as fh:
11
+ json.dump(payload, fh, ...) ; fh.write("\\n")
12
+ os.replace(tmp_path, target)
13
+ except BaseException:
14
+ try: os.unlink(tmp_path)
15
+ except OSError: pass
16
+ raise
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import logging
22
+ import os
23
+ import tempfile
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+ from typing import Any, Optional
27
+
28
+ from .map_common import MAPS_SUBDIR, SEEDS_SUBDIR
29
+ from .map_errors import MapConcurrencyError, MapSecurityError
30
+ from .map_models import (
31
+ AuthorityDomain,
32
+ BuildMeta,
33
+ DataContractEntry,
34
+ RepoMaps,
35
+ RuntimeNode,
36
+ StructuralEntry,
37
+ )
38
+ from .map_models_ext import ConflictEntry, HotspotEntry, RefactorBoundary
39
+ from .map_models_findings import Finding
40
+
41
+ __all__ = [
42
+ "BuildMeta",
43
+ "load_repo_maps",
44
+ "write_map",
45
+ "regenerate_index",
46
+ "maps_dir",
47
+ "seeds_dir",
48
+ ]
49
+
50
+ _log = logging.getLogger(__name__)
51
+
52
+ # Map filenames (relative to maps_dir(project_dir))
53
+ _MAP_FILES: dict[str, str] = {
54
+ "structural": "10_structural_map.json",
55
+ "runtime": "20_runtime_map.json",
56
+ "data_contract": "30_data_contract_map.json",
57
+ "authority": "40_authority_map.json",
58
+ "conflict": "50_conflict_map.json",
59
+ "hotspot": "60_hotspot_map.json",
60
+ "findings": "80_findings_map.json",
61
+ "refactor_boundary": "70_refactor_boundaries.json",
62
+ }
63
+
64
+ _INDEX_FILE = "00_map_index.json"
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Path helpers (public — exposed in __all__ and __init__.py)
69
+ # ---------------------------------------------------------------------------
70
+
71
+ def maps_dir(project_dir: Path) -> Path:
72
+ """Default output location: <project_dir>/.cortex/maps/"""
73
+ return project_dir.resolve() / MAPS_SUBDIR
74
+
75
+
76
+ def seeds_dir(project_dir: Path) -> Path:
77
+ """Default seed config location: <project_dir>/.cortex/map_seeds/"""
78
+ return project_dir.resolve() / SEEDS_SUBDIR
79
+
80
+
81
+ # ---------------------------------------------------------------------------
82
+ # Internal helpers
83
+ # ---------------------------------------------------------------------------
84
+
85
+ def _check_path_security(project_dir: Path, target: Path) -> None:
86
+ """Verify target is inside project_dir. Raises MapSecurityError otherwise."""
87
+ project_dir_resolved = project_dir.resolve(strict=False)
88
+ target_resolved = target.resolve(strict=False)
89
+ try:
90
+ target_resolved.relative_to(project_dir_resolved)
91
+ except ValueError:
92
+ raise MapSecurityError(
93
+ "Path escape attempt: %s is not inside %s" % (target, project_dir)
94
+ )
95
+
96
+
97
+ def _atomic_write_json(path: Path, payload: dict) -> None:
98
+ """Write payload to path atomically via tempfile + os.replace.
99
+
100
+ Pattern mirrors SYSTEM/runtime/runtime_lock.py::acquire().
101
+ """
102
+ path.parent.mkdir(parents=True, exist_ok=True)
103
+ fd, tmp_path = tempfile.mkstemp(
104
+ dir=str(path.parent),
105
+ prefix=".map_",
106
+ suffix=".tmp",
107
+ )
108
+ try:
109
+ with os.fdopen(fd, "w", encoding="utf-8") as fh:
110
+ fh.write(json.dumps(payload, indent=2, ensure_ascii=False, sort_keys=True))
111
+ fh.write("\n")
112
+ os.replace(tmp_path, str(path))
113
+ except BaseException:
114
+ try:
115
+ os.unlink(tmp_path)
116
+ except OSError as exc:
117
+ _log.error("map_storage: cleanup failed for %s: %s", tmp_path, exc)
118
+ raise
119
+ _log.debug("_atomic_write_json: wrote %s", path)
120
+
121
+
122
+ def _read_json(path: Path) -> Optional[dict]:
123
+ """Read JSON from path.
124
+
125
+ Returns:
126
+ dict -- file exists and parses cleanly.
127
+ None -- file does not exist (legitimate absent state).
128
+
129
+ Raises:
130
+ OSError / UnicodeDecodeError -- I/O failure reading an existing file.
131
+ ValueError -- file exists but contains invalid JSON or
132
+ a non-dict top-level value (corrupt map).
133
+ """
134
+ if not path.exists():
135
+ _log.debug("_read_json: file not found: %s", path)
136
+ return None
137
+ try:
138
+ text = path.read_text(encoding="utf-8")
139
+ except (OSError, UnicodeDecodeError) as exc:
140
+ _log.error("_read_json: I/O error reading %s: %s", path, exc)
141
+ raise
142
+ try:
143
+ data = json.loads(text)
144
+ except json.JSONDecodeError as exc:
145
+ _log.error("_read_json: corrupt JSON in %s: %s", path, exc)
146
+ raise ValueError("%s is corrupt: %s" % (path, exc)) from exc
147
+ if not isinstance(data, dict):
148
+ msg = "_read_json: expected dict, got %s in %s" % (type(data).__name__, path)
149
+ _log.error(msg)
150
+ raise ValueError(msg)
151
+ return data
152
+
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # Public API
156
+ # ---------------------------------------------------------------------------
157
+
158
+ def load_repo_maps(project_dir: Path) -> RepoMaps:
159
+ """Load all 8 maps from <project_dir>/.cortex/maps/.
160
+
161
+ Returns RepoMaps(missing=True) if the maps directory is absent.
162
+ """
163
+ mdir = maps_dir(project_dir)
164
+ if not mdir.is_dir():
165
+ _log.info("load_repo_maps: maps directory absent at %s -- returning missing=True", mdir)
166
+ return RepoMaps(missing=True)
167
+
168
+ def _load_entries(name: str, from_dict_fn):
169
+ filename = _MAP_FILES[name]
170
+ path = mdir / filename
171
+ payload = _read_json(path)
172
+ if payload is None:
173
+ _log.warning("load_repo_maps: map file absent or corrupt: %s", filename)
174
+ return ()
175
+ entries_raw = payload.get("entries", [])
176
+ result = []
177
+ for raw in entries_raw:
178
+ try:
179
+ result.append(from_dict_fn(raw))
180
+ except (KeyError, TypeError, ValueError) as exc:
181
+ _log.warning("load_repo_maps: skipping corrupt entry in %s: %s", filename, exc)
182
+ return tuple(result)
183
+
184
+ structural = _load_entries("structural", StructuralEntry.from_dict)
185
+ runtime = _load_entries("runtime", RuntimeNode.from_dict)
186
+ data_contract = _load_entries("data_contract", DataContractEntry.from_dict)
187
+ authority = _load_entries("authority", AuthorityDomain.from_dict)
188
+ conflict = _load_entries("conflict", ConflictEntry.from_dict)
189
+ hotspot = _load_entries("hotspot", HotspotEntry.from_dict)
190
+ refactor_boundary = _load_entries("refactor_boundary", RefactorBoundary.from_dict)
191
+
192
+ # Load findings (Map 8)
193
+ findings_list: list[Finding] = []
194
+ findings_path = mdir / _MAP_FILES["findings"]
195
+ if findings_path.exists():
196
+ try:
197
+ payload = json.loads(findings_path.read_text(encoding="utf-8"))
198
+ for raw_entry in payload.get("entries", []):
199
+ try:
200
+ findings_list.append(Finding.from_dict(raw_entry))
201
+ except (KeyError, TypeError, ValueError) as exc:
202
+ _log.warning("load_repo_maps: skipping corrupt finding entry: %s", exc)
203
+ except (OSError, json.JSONDecodeError, ValueError, KeyError) as exc:
204
+ _log.warning("load_repo_maps: failed to load findings from %s: %s", findings_path, exc)
205
+
206
+ _log.info(
207
+ "load_repo_maps: loaded structural=%d runtime=%d data_contract=%d "
208
+ "authority=%d conflict=%d hotspot=%d boundary=%d findings=%d",
209
+ len(structural), len(runtime), len(data_contract),
210
+ len(authority), len(conflict), len(hotspot), len(refactor_boundary),
211
+ len(findings_list),
212
+ )
213
+ return RepoMaps(
214
+ structural=structural,
215
+ runtime=runtime,
216
+ data_contract=data_contract,
217
+ authority=authority,
218
+ conflict=conflict,
219
+ hotspot=hotspot,
220
+ refactor_boundary=refactor_boundary,
221
+ findings=tuple(findings_list),
222
+ missing=False,
223
+ )
224
+
225
+
226
+ def write_map(
227
+ project_dir: Path,
228
+ name: str,
229
+ entries: list,
230
+ metadata: dict,
231
+ *,
232
+ build_meta: BuildMeta | None = None,
233
+ maps_dir_override: Path | None = None,
234
+ ) -> None:
235
+ """Atomically write a named map with filelock protection.
236
+
237
+ Output path: <maps_dir>/<filename>.json (maps_dir defaults to
238
+ <project_dir>/.cortex/maps/ or maps_dir_override if given).
239
+ Lock path: <maps_dir>/.<name>.lock
240
+
241
+ Args:
242
+ project_dir: Absolute path to the target project root. Used for the
243
+ default maps directory and security checks when no override given.
244
+ name: Map name key (e.g. "structural"). Must be in _MAP_FILES.
245
+ entries: List of entry dicts to write under the "entries" key.
246
+ metadata: Additional top-level keys merged into the payload
247
+ (e.g. schema_version, produced_by, trace_status).
248
+ build_meta: Optional BuildMeta instance. When provided, adds a
249
+ top-level "build_meta" key and sets schema_version to "2.0.0".
250
+ When None, schema_version remains "1.0.0" (backward compat).
251
+ maps_dir_override: If given, writes to this directory instead of
252
+ <project_dir>/.cortex/maps/. The directory is created if absent.
253
+ Security check is performed against this directory's own resolved
254
+ path (not project_dir) so temp dirs are allowed.
255
+
256
+ Raises:
257
+ MapConcurrencyError: If filelock timeout exceeded.
258
+ MapSecurityError: If computed path escapes the target dir.
259
+ KeyError: If name is not a known map name.
260
+ """
261
+ try:
262
+ from filelock import FileLock, Timeout as FileLockTimeout
263
+ except ImportError as exc:
264
+ raise ImportError("filelock is required: %s" % exc) from exc
265
+
266
+ if name not in _MAP_FILES:
267
+ raise KeyError("Unknown map name: %s. Known names: %s" % (name, list(_MAP_FILES)))
268
+
269
+ if maps_dir_override is not None:
270
+ mdir = maps_dir_override.resolve()
271
+ target_path = mdir / _MAP_FILES[name]
272
+ lock_path = mdir / (".%s.lock" % name)
273
+ # Security check: target must stay inside the override dir
274
+ try:
275
+ target_path.relative_to(mdir)
276
+ except ValueError:
277
+ from .map_errors import MapSecurityError
278
+ raise MapSecurityError(
279
+ "Path escape attempt: %s is not inside override dir %s" % (target_path, mdir)
280
+ )
281
+ else:
282
+ mdir = maps_dir(project_dir)
283
+ target_path = mdir / _MAP_FILES[name]
284
+ lock_path = mdir / (".%s.lock" % name)
285
+ # Security check before acquiring lock
286
+ _check_path_security(project_dir, target_path)
287
+ _check_path_security(project_dir, lock_path)
288
+
289
+ mdir.mkdir(parents=True, exist_ok=True)
290
+
291
+ payload: dict[str, Any] = {
292
+ "schema_version": "1.0.0",
293
+ "produced_by": "vigil_mapper.v1",
294
+ }
295
+ payload.update(metadata)
296
+ payload["entries"] = entries
297
+
298
+ # Embed build_meta if provided. The payload schema_version stays at
299
+ # "1.0.0" for backward-compat with validators (e.g. refactor_boundary_builder
300
+ # enforces major ≤ 1). The PRESENCE of "build_meta" is the v2.0.0 signal;
301
+ # regenerate_index reads it to populate the index and emits schema_version
302
+ # "2.0.0" in the per-map index entry when build_meta is present.
303
+ if build_meta is not None:
304
+ payload["build_meta"] = build_meta.to_dict()
305
+
306
+ _log.debug("write_map: acquiring lock for %s", name)
307
+ try:
308
+ with FileLock(str(lock_path), timeout=10):
309
+ _log.debug("write_map: lock acquired for %s", name)
310
+ _atomic_write_json(target_path, payload)
311
+ except FileLockTimeout as exc:
312
+ _log.error("write_map: filelock timeout for map %s: %s", name, exc)
313
+ raise MapConcurrencyError(
314
+ "Filelock timeout (10s) for map %s -- another writer may be running" % name
315
+ ) from exc
316
+
317
+ _log.info("write_map: wrote %s (%d entries)", name, len(entries))
318
+
319
+
320
+ def regenerate_index(
321
+ project_dir: Path,
322
+ *,
323
+ maps_dir_override: Path | None = None,
324
+ ) -> None:
325
+ """Rebuild 00_map_index.json from existing map files on disk.
326
+
327
+ Structure per plan sec.9 Observability Contract.
328
+ Output: <maps_dir>/00_map_index.json (default: <project_dir>/.cortex/maps/).
329
+
330
+ Args:
331
+ project_dir: Absolute path to the target project root.
332
+ maps_dir_override: If given, reads maps from and writes index to this
333
+ directory instead of <project_dir>/.cortex/maps/.
334
+ """
335
+ if maps_dir_override is not None:
336
+ mdir = maps_dir_override.resolve()
337
+ else:
338
+ mdir = maps_dir(project_dir)
339
+ mdir.mkdir(parents=True, exist_ok=True)
340
+
341
+ from .fingerprint import map_schema_hash
342
+
343
+ built_at = (
344
+ datetime.now(timezone.utc)
345
+ .isoformat()
346
+ .replace("+00:00", "Z")
347
+ )
348
+
349
+ maps_section: dict[str, Any] = {}
350
+ total_entries = 0
351
+ warnings_count = 0
352
+ errors_count = 0
353
+ all_schema_versions: list[str] = []
354
+ # Accumulate language -> {map_name: bool} across all maps for the matrix.
355
+ # Key: language string. Value: dict mapping each map name to True/False.
356
+ _lang_support: dict[str, dict[str, bool]] = {}
357
+
358
+ for name, filename in _MAP_FILES.items():
359
+ path = mdir / filename
360
+ payload = _read_json(path)
361
+ if payload is None:
362
+ warnings_count += 1
363
+ maps_section[name] = {"status": "missing"}
364
+ continue
365
+
366
+ entries = payload.get("entries", [])
367
+ entry_count = len(entries)
368
+ total_entries += entry_count
369
+ schema_ver = payload.get("schema_version", "0.0.0")
370
+ all_schema_versions.append(schema_ver)
371
+
372
+ schema_hash_val = map_schema_hash(entries) if entries else ""
373
+ file_bytes = path.stat().st_size if path.exists() else 0
374
+
375
+ # Derive per-map index entry from build_meta if present (v2.0.0),
376
+ # otherwise emit a legacy sentinel (v1.0.0 payloads without build_meta).
377
+ raw_build_meta = payload.get("build_meta")
378
+ if raw_build_meta is not None:
379
+ bm = BuildMeta.from_dict(raw_build_meta)
380
+ supported_langs: list[str] = bm.coverage.get("supported_languages", [])
381
+
382
+ # Accumulate into the cross-map language support matrix.
383
+ all_langs = set(bm.coverage.get("files_scanned_by_lang", {}).keys()) | set(supported_langs)
384
+ for lang in all_langs:
385
+ if lang not in _lang_support:
386
+ _lang_support[lang] = {}
387
+ _lang_support[lang][name] = lang in supported_langs
388
+
389
+ map_entry: dict[str, Any] = {
390
+ "analysis_mode": bm.analysis_mode,
391
+ # build_duration_s and built_at are in semantic_diff._IGNORED_FIELDS,
392
+ # so they are stripped during I2 determinism checks.
393
+ "build_duration_s": bm.duration_s,
394
+ "built_at": bm.built_at,
395
+ "confidence_avg": bm.confidence_avg,
396
+ "coverage_ratio": bm.coverage.get("coverage_ratio", 0.0),
397
+ "entry_count": entry_count,
398
+ # file_bytes is in _IGNORED_FIELDS (size jitters with duration precision).
399
+ "file_bytes": file_bytes,
400
+ "languages_detected": bm.coverage.get("files_scanned_by_lang", {}),
401
+ "producer": bm.producer,
402
+ "reason": bm.reason,
403
+ "schema_hash": schema_hash_val,
404
+ "schema_version": "2.0.0", # build_meta presence signals v2
405
+ "supported_languages": supported_langs,
406
+ "status": bm.status,
407
+ }
408
+ # 5.3: surface unsupported_files_sample when the build_meta contains it.
409
+ if bm.unsupported_files_sample:
410
+ map_entry["unsupported_files_sample"] = list(bm.unsupported_files_sample)
411
+ maps_section[name] = map_entry
412
+ else:
413
+ # Legacy v1.0.0 payload — no build_meta present.
414
+ maps_section[name] = {
415
+ "entry_count": entry_count,
416
+ "file_bytes": file_bytes,
417
+ "schema_hash": schema_hash_val,
418
+ "schema_version": "1.0.0",
419
+ "status": "legacy",
420
+ }
421
+
422
+ schema_versions_all_equal = len(set(all_schema_versions)) <= 1
423
+
424
+ # 5.1: Build per-language support matrix across all 7 maps.
425
+ # For each language seen in any map, record True/False per map name.
426
+ # Maps absent from a language's accumulator are filled with False.
427
+ all_map_names = list(_MAP_FILES.keys())
428
+ language_support: dict[str, dict[str, bool]] = {
429
+ lang: {mn: _lang_support[lang].get(mn, False) for mn in all_map_names}
430
+ for lang in sorted(_lang_support)
431
+ }
432
+
433
+ index_payload: dict[str, Any] = {
434
+ "schema_version": "1.0.0",
435
+ "produced_by": "vigil_mapper.v1",
436
+ "built_at": built_at,
437
+ "pipeline_success": errors_count == 0,
438
+ "maps": maps_section,
439
+ "global": {
440
+ "total_entries": total_entries,
441
+ "schema_versions_all_equal": schema_versions_all_equal,
442
+ "warnings_count": warnings_count,
443
+ "errors_count": errors_count,
444
+ "language_support": language_support,
445
+ },
446
+ }
447
+
448
+ index_path = mdir / _INDEX_FILE
449
+ if maps_dir_override is None:
450
+ _check_path_security(project_dir, index_path)
451
+ _atomic_write_json(index_path, index_payload)
452
+ _log.info(
453
+ "regenerate_index: total_entries=%d warnings=%d errors=%d",
454
+ total_entries, warnings_count, errors_count,
455
+ )