vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,731 @@
1
+ """CLI entry point for the map builder subsystem.
2
+
3
+ cmd_map_build(args) is called from the Vigil app dispatcher.
4
+ Returns an integer exit code (never calls sys.exit directly).
5
+
6
+ Exit codes:
7
+ 0 -- success, pipeline_success=true
8
+ 1 -- application error (exception in builder or subprocess)
9
+ 2 -- validation error (bad args, path issues)
10
+ 3 -- strict mode: warnings detected
11
+ 4 -- strict mode: new conflicts detected
12
+ 124 -- (reserved) timeout; tracer timeout uses --timeout-s
13
+
14
+ Design note: conflict and hotspot builders depend on prior maps (structural,
15
+ runtime, authority, data_contract). When building all maps in sequence the
16
+ pipeline keeps an in-memory accumulator (RepoMaps) so conflict/hotspot do not
17
+ need to re-read from disk. This also makes --dry-run work correctly: no files
18
+ are written, but downstream builders still receive the freshly built entries.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import json
24
+ import logging
25
+ import re
26
+ import sys
27
+ import time
28
+ from pathlib import Path
29
+ from typing import Any
30
+
31
+ _log = logging.getLogger(__name__)
32
+
33
+ # Map names in dependency order:
34
+ # - conflict+hotspot depend on structural/runtime/authority/data_contract
35
+ # - findings depends on all 7 maps (includes hotspot)
36
+ # - refactor_boundary may depend on findings (auto-inferred boundaries use SCC + hotspot)
37
+ _ALL_MAPS_ORDERED = [
38
+ "structural",
39
+ "data_contract",
40
+ "authority",
41
+ "runtime",
42
+ "conflict",
43
+ "hotspot",
44
+ "findings",
45
+ "refactor_boundary",
46
+ ]
47
+
48
+ _ANSI_RE = re.compile(r"\033\[[0-9;]*m")
49
+
50
+
51
+ def _strip_ansi(text: str) -> str:
52
+ return _ANSI_RE.sub("", text)
53
+
54
+
55
+ def _print_line(msg: str, no_color: bool = False, file=None) -> None:
56
+ """Print to stdout (or file). Strips ANSI if --no-color."""
57
+ if no_color:
58
+ msg = _strip_ansi(msg)
59
+ if file is not None:
60
+ print(msg, file=file)
61
+ else:
62
+ print(msg)
63
+
64
+
65
+ def _build_metadata(map_name: str, duration_s: float) -> dict[str, Any]:
66
+ """Build standard metadata dict for write_map."""
67
+ from datetime import datetime, timezone
68
+ built_at = (
69
+ datetime.now(timezone.utc)
70
+ .isoformat()
71
+ .replace("+00:00", "Z")
72
+ )
73
+ return {
74
+ "built_at": built_at,
75
+ "build_duration_s": round(duration_s, 2),
76
+ "map_name": map_name,
77
+ }
78
+
79
+
80
+ def _scan_project_files_by_lang(project_dir: Path) -> dict[str, int]:
81
+ """Scan project_dir once and return {language: file_count} via ADAPTERS.
82
+
83
+ Called ONCE per full build in cmd_map_build to ensure all maps in the
84
+ same build share a consistent file-count snapshot (determinism for I2).
85
+ """
86
+ from .map_common import iter_source_files
87
+ from .source_adapters import ADAPTERS
88
+
89
+ files_by_lang: dict[str, int] = {}
90
+ for f in iter_source_files(project_dir):
91
+ ext = f.suffix.lower()
92
+ if ext in ADAPTERS:
93
+ lang = ADAPTERS[ext].language
94
+ files_by_lang[lang] = files_by_lang.get(lang, 0) + 1
95
+ return files_by_lang
96
+
97
+
98
+ def _authority_conflict_build_meta_kwargs(
99
+ map_name: str,
100
+ files_by_lang: dict[str, int],
101
+ ) -> dict:
102
+ """Return keyword overrides for _build_build_meta when coverage is partial.
103
+
104
+ Computes coverage_ratio from files_by_lang. If ratio < 1.0, emits
105
+ analysis_mode / status / reason reflecting honest partial coverage.
106
+
107
+ Returns an empty dict when coverage is 1.0 (pure Python project) so the
108
+ defaults in _build_build_meta remain unchanged.
109
+ """
110
+ py_count = files_by_lang.get("python", 0)
111
+ total = sum(files_by_lang.values())
112
+ if total == 0 or py_count == total:
113
+ return {} # full coverage or empty project -- no override needed
114
+
115
+ non_py = total - py_count
116
+ ratio = py_count / total # coverage_ratio (same formula as _build_build_meta)
117
+
118
+ if map_name == "authority":
119
+ return {
120
+ "analysis_mode": "python_ast+seed_only",
121
+ "status": "partial",
122
+ "reason": (
123
+ f"Authority writer detection covers {py_count} Python file(s) via AST; "
124
+ f"{non_py} non-Python file(s) covered by seed only "
125
+ f"(writer detectors arrive in Phase L7a). "
126
+ f"coverage_ratio={ratio:.2f}"
127
+ ),
128
+ }
129
+ if map_name == "conflict":
130
+ return {
131
+ "analysis_mode": "python_ast+seed_only",
132
+ "status": "partial",
133
+ "reason": (
134
+ f"partial_upstream_limited: authority coverage_ratio={ratio:.2f}; "
135
+ f"{non_py} non-Python file(s) not analysed for write conflicts."
136
+ ),
137
+ }
138
+ return {}
139
+
140
+
141
+ def _build_build_meta(
142
+ map_name: str,
143
+ duration_s: float,
144
+ project_dir: Path,
145
+ producer_module: str,
146
+ files_by_lang: dict[str, int],
147
+ *,
148
+ analysis_mode: str = "python_ast",
149
+ status: str = "ok",
150
+ reason: str = "",
151
+ confidence_avg: float = 1.0,
152
+ ) -> "Any":
153
+ """Construct a BuildMeta for a given map.
154
+
155
+ Args:
156
+ files_by_lang: Pre-computed {language: count} from _scan_project_files_by_lang.
157
+ Passed in rather than re-computed per map to ensure all maps in a
158
+ single build share a consistent file-count snapshot (I2 determinism).
159
+ """
160
+ from datetime import datetime, timezone
161
+
162
+ from .map_common import get_coverage_metadata
163
+ from .map_models import BuildMeta
164
+
165
+ built_at = (
166
+ datetime.now(timezone.utc)
167
+ .isoformat()
168
+ .replace("+00:00", "Z")
169
+ )
170
+
171
+ coverage_meta = get_coverage_metadata(map_name)
172
+ supported_languages = coverage_meta.get("supported_languages", [])
173
+ files_supported: dict[str, int] = {
174
+ lang: files_by_lang.get(lang, 0)
175
+ for lang in supported_languages
176
+ }
177
+ total_scanned = sum(files_by_lang.values())
178
+ total_supported = sum(files_supported.values())
179
+ coverage_ratio = (total_supported / total_scanned) if total_scanned > 0 else 0.0
180
+
181
+ return BuildMeta(
182
+ analysis_mode=analysis_mode,
183
+ status=status,
184
+ reason=reason,
185
+ confidence_avg=confidence_avg,
186
+ coverage={
187
+ "files_scanned_by_lang": files_by_lang,
188
+ "files_supported_by_lang": files_supported,
189
+ "coverage_ratio": coverage_ratio,
190
+ "supported_languages": supported_languages,
191
+ },
192
+ producer=producer_module,
193
+ built_at=built_at,
194
+ duration_s=round(duration_s, 3),
195
+ )
196
+
197
+
198
+ def _make_repo_maps_from_accumulator(acc: dict[str, list]) -> Any:
199
+ """Build a RepoMaps from accumulated in-memory entry objects.
200
+
201
+ acc keys: structural, runtime, data_contract, authority, conflict, hotspot, refactor_boundary.
202
+ Values are lists of *entry objects* (dataclass instances), not dicts.
203
+ """
204
+ from .map_models import RepoMaps
205
+ return RepoMaps(
206
+ structural=tuple(acc.get("structural", ())),
207
+ runtime=tuple(acc.get("runtime", ())),
208
+ data_contract=tuple(acc.get("data_contract", ())),
209
+ authority=tuple(acc.get("authority", ())),
210
+ conflict=tuple(acc.get("conflict", ())),
211
+ hotspot=tuple(acc.get("hotspot", ())),
212
+ refactor_boundary=tuple(acc.get("refactor_boundary", ())),
213
+ missing=False,
214
+ )
215
+
216
+
217
+ def _build_single_map(
218
+ map_name: str,
219
+ project_dir: Path,
220
+ timeout_s: int,
221
+ runtime_target: str | None,
222
+ in_memory_acc: dict[str, list],
223
+ parse_cache: "Any | None" = None,
224
+ maps_dir_override: Path | None = None,
225
+ cancel_event: "Any | None" = None,
226
+ ) -> tuple[list, list, dict, bool]:
227
+ """Build one named map. Returns (entry_objects, entry_dicts, metadata, had_warning).
228
+
229
+ entry_objects -- raw dataclass instances (for in-memory accumulator).
230
+ entry_dicts -- serialised dicts ready for write_map / dry-run logging.
231
+ metadata -- metadata dict for write_map.
232
+ had_warning -- True if the builder emitted a degraded-mode warning.
233
+ parse_cache -- optional ParseCacheL1 instance; passed through to builders
234
+ that support it (structural, runtime, data_contract, authority).
235
+ maps_dir_override -- optional override for maps directory (for --output-dir).
236
+ cancel_event -- optional threading.Event; builder stops early when set.
237
+ """
238
+ t0 = time.perf_counter()
239
+ had_warning = False
240
+
241
+ if map_name == "structural":
242
+ from .structural_builder import build_structural_map
243
+ entries_obj = build_structural_map(
244
+ project_dir, parse_cache=parse_cache, cancel_event=cancel_event
245
+ )
246
+
247
+ elif map_name == "data_contract":
248
+ from .data_contract_builder import build_data_contract_map
249
+ entries_obj = build_data_contract_map(project_dir, parse_cache=parse_cache)
250
+
251
+ elif map_name == "authority":
252
+ from .authority_builder import build_authority_map
253
+ entries_obj = build_authority_map(project_dir, parse_cache=parse_cache)
254
+
255
+ elif map_name == "runtime":
256
+ if runtime_target:
257
+ from .runtime_builder import build_runtime_map_full
258
+ entries_obj, _extra = build_runtime_map_full(
259
+ project_dir,
260
+ target_module=runtime_target,
261
+ timeout_s=float(min(timeout_s, 90)),
262
+ )
263
+ else:
264
+ from .runtime_builder import build_runtime_map_static
265
+ entries_obj = build_runtime_map_static(project_dir, parse_cache=parse_cache)
266
+
267
+ elif map_name == "refactor_boundary":
268
+ from .refactor_boundary_builder import load_refactor_seeds, infer_refactor_boundaries
269
+
270
+ # Step 1: Load manual seeds (user-authored, persistent)
271
+ seed_boundaries = load_refactor_seeds(project_dir)
272
+
273
+ # Step 2: ALWAYS infer auto-boundaries (deterministic from repo maps)
274
+ if in_memory_acc:
275
+ repo_maps = _make_repo_maps_from_accumulator(in_memory_acc)
276
+ auto_boundaries = infer_refactor_boundaries(repo_maps)
277
+ else:
278
+ from .map_storage import load_repo_maps
279
+ repo_maps = load_repo_maps(project_dir)
280
+ auto_boundaries = infer_refactor_boundaries(repo_maps) if not repo_maps.missing else []
281
+
282
+ # Step 3: Merge: seed entries override auto-inferred for same files
283
+ # Seeds have higher priority and appear first
284
+ entries_obj = []
285
+
286
+ # Collect auto-inferred boundary file sets
287
+ auto_file_sets: set[tuple[str, ...]] = set()
288
+ for auto_b in auto_boundaries:
289
+ # Use allowed_files + forbidden_files as the identity
290
+ file_set = tuple(sorted(set(auto_b.allowed_files) | set(auto_b.forbidden_files)))
291
+ auto_file_sets.add(file_set)
292
+
293
+ # Add seed entries first (they have priority)
294
+ entries_obj.extend(seed_boundaries)
295
+
296
+ # Track which file sets are already covered by seeds
297
+ seed_file_sets: set[tuple[str, ...]] = set()
298
+ for seed_b in seed_boundaries:
299
+ file_set = tuple(sorted(set(seed_b.allowed_files) | set(seed_b.forbidden_files)))
300
+ seed_file_sets.add(file_set)
301
+
302
+ # Add auto-inferred entries that don't conflict with seeds
303
+ for auto_b in auto_boundaries:
304
+ file_set = tuple(sorted(set(auto_b.allowed_files) | set(auto_b.forbidden_files)))
305
+ if file_set not in seed_file_sets:
306
+ entries_obj.append(auto_b)
307
+
308
+ elif map_name == "conflict":
309
+ # Prefer in-memory accumulator; fall back to disk if running single-map mode
310
+ if in_memory_acc:
311
+ repo_maps = _make_repo_maps_from_accumulator(in_memory_acc)
312
+ else:
313
+ from .map_storage import load_repo_maps
314
+ repo_maps = load_repo_maps(project_dir)
315
+ if repo_maps.missing:
316
+ _log.warning(
317
+ "conflict builder: no maps found at %s/.cortex/maps/ "
318
+ "-- building with empty inputs",
319
+ project_dir,
320
+ )
321
+ had_warning = True
322
+ from .conflict_builder import build_conflict_map
323
+ entries_obj = build_conflict_map(repo_maps)
324
+
325
+ elif map_name == "hotspot":
326
+ # Prefer in-memory accumulator; fall back to disk if running single-map mode
327
+ if in_memory_acc:
328
+ repo_maps = _make_repo_maps_from_accumulator(in_memory_acc)
329
+ else:
330
+ from .map_storage import load_repo_maps
331
+ repo_maps = load_repo_maps(project_dir)
332
+ if repo_maps.missing:
333
+ _log.warning(
334
+ "hotspot builder: no maps found at %s/.cortex/maps/ "
335
+ "-- building with empty inputs",
336
+ project_dir,
337
+ )
338
+ had_warning = True
339
+ from .hotspot_builder import build_hotspot_map, compute_hotspot_churn_metadata
340
+ _churn_data, _churn_meta = compute_hotspot_churn_metadata(project_dir)
341
+ entries_obj = build_hotspot_map(repo_maps, churn_data=_churn_data)
342
+
343
+ elif map_name == "findings":
344
+ # Prefer in-memory accumulator; fall back to disk if running single-map mode
345
+ if in_memory_acc:
346
+ repo_maps = _make_repo_maps_from_accumulator(in_memory_acc)
347
+ else:
348
+ from .map_storage import load_repo_maps
349
+ repo_maps = load_repo_maps(project_dir)
350
+ if repo_maps.missing:
351
+ _log.warning(
352
+ "findings builder: no maps found at %s/.cortex/maps/ "
353
+ "-- building with empty inputs",
354
+ project_dir,
355
+ )
356
+ had_warning = True
357
+ from .findings_builder import build_findings_map
358
+ entries_obj = build_findings_map(project_dir, repo_maps, maps_dir_override=maps_dir_override)
359
+
360
+ else:
361
+ raise ValueError("Unknown map name: %s" % map_name)
362
+
363
+ duration = time.perf_counter() - t0
364
+ metadata = _build_metadata(map_name, duration)
365
+ # For hotspot: embed churn audit fields so cmd_map_build can write them to index.
366
+ if map_name == "hotspot":
367
+ metadata.update(_churn_meta) # type: ignore[possibly-undefined]
368
+ entry_dicts = [e.to_dict() for e in entries_obj]
369
+ return entries_obj, entry_dicts, metadata, had_warning
370
+
371
+
372
+ def _maybe_materialize_remote_tree(project_dir: Path) -> Path:
373
+ """Remote tree materialization not available in standalone mode."""
374
+ return project_dir
375
+
376
+
377
+ def cmd_map_build(args: argparse.Namespace) -> int:
378
+ """Entry point called from dispatch. Returns exit code (never sys.exit)."""
379
+
380
+ # 1. Validate --project
381
+ project_str = getattr(args, "project", None)
382
+ if not project_str:
383
+ _log.error("--project is required")
384
+ print("[ERR] --project is required", file=sys.stderr)
385
+ return 2
386
+
387
+ project_dir = Path(project_str).resolve()
388
+ if not project_dir.exists():
389
+ _log.error("--project path does not exist: %s", project_dir)
390
+ print("[ERR] --project path does not exist: %s" % project_dir, file=sys.stderr)
391
+ return 2
392
+ if not project_dir.is_dir():
393
+ _log.error("--project path is not a directory: %s", project_dir)
394
+ print("[ERR] --project path is not a directory: %s" % project_dir, file=sys.stderr)
395
+ return 2
396
+
397
+ # X2.1: when transport_mode=remote_authoritative, materialize a
398
+ # tarball of the SERVER tree into a local cache and run builders
399
+ # against that. Local copy is a thin client and would yield stale
400
+ # maps. ``sync_fallback`` and missing-contract paths use the local
401
+ # copy as before (it IS the truth in those modes).
402
+ real_project_dir = project_dir # save original before possible materialize
403
+ project_dir = _maybe_materialize_remote_tree(project_dir)
404
+
405
+ # 2. Configure logging
406
+ verbose = bool(getattr(args, "verbose", False))
407
+ if verbose:
408
+ logging.getLogger().setLevel(logging.DEBUG)
409
+ _log.debug("Verbose mode enabled")
410
+
411
+ # 3. Determine map set
412
+ map_arg = getattr(args, "map", "all")
413
+ if map_arg == "all":
414
+ map_set = list(_ALL_MAPS_ORDERED)
415
+ else:
416
+ map_set = [map_arg]
417
+
418
+ dry_run = bool(getattr(args, "dry_run", False))
419
+ no_color = bool(getattr(args, "no_color", False))
420
+ strict = bool(getattr(args, "strict", False))
421
+ timeout_s = int(getattr(args, "timeout_s", 300))
422
+ output_dir = getattr(args, "output_dir", None)
423
+ runtime_target = getattr(args, "runtime_target", None)
424
+ json_mode = bool(getattr(args, "json", False))
425
+ max_file_mb = float(getattr(args, "max_file_mb", 5.0))
426
+ cancel_event = getattr(args, "cancel_event", None)
427
+
428
+ target_maps_dir: Path | None = None
429
+ if output_dir:
430
+ target_maps_dir = Path(output_dir).resolve()
431
+ target_maps_dir.mkdir(parents=True, exist_ok=True)
432
+ _log.info("map-build: using output_dir override: %s", target_maps_dir)
433
+
434
+ # Phase 2: If materialization happened and user didn't specify --output-dir,
435
+ # force maps to real project dir (not _remote_cache), so supervisor reads
436
+ # materialized maps from the canonical location, not stale local copies.
437
+ if project_dir != real_project_dir and target_maps_dir is None:
438
+ target_maps_dir = real_project_dir / ".cortex" / "maps"
439
+ target_maps_dir.mkdir(parents=True, exist_ok=True)
440
+ _log.info(
441
+ "map-build: remote_authoritative — forcing maps output to %s",
442
+ target_maps_dir,
443
+ )
444
+
445
+ _log.info(
446
+ "map-build start: project=%s map=%s dry_run=%s strict=%s",
447
+ project_dir, map_arg, dry_run, strict,
448
+ )
449
+
450
+ # 4. Build each map — keep in-memory accumulator for conflict/hotspot
451
+ from .map_storage import write_map, regenerate_index
452
+
453
+ total = len(map_set)
454
+ total_warnings = 0
455
+ new_conflicts = 0
456
+ # When --json is active, progress lines go to stderr so stdout stays clean JSON
457
+ _progress_file = sys.stderr if json_mode else None
458
+
459
+ # in_memory_acc holds dataclass instances for downstream builders
460
+ in_memory_acc: dict[str, list] = {}
461
+ # churn metadata captured during hotspot build for index audit (E2)
462
+ _hotspot_churn_meta: dict | None = None
463
+
464
+ # Pre-compute file scan once per build (shared by all _build_build_meta calls)
465
+ # so that all maps in this build share a consistent file-count snapshot.
466
+ # This ensures I2 (build determinism) passes: the index coverage fields
467
+ # are identical between two back-to-back builds of the same project.
468
+ _files_by_lang: dict[str, int] = _scan_project_files_by_lang(project_dir)
469
+
470
+ # Initialise two-level parse cache. L2 uses real_project_dir so the on-disk
471
+ # cache survives across builds even in remote_authoritative mode where
472
+ # project_dir may be a _remote_cache subdirectory.
473
+ from .parse_cache import ParseCacheL1, ParseCacheL2 # noqa: PLC0415
474
+ _parse_l2 = ParseCacheL2(real_project_dir)
475
+ _parse_l1 = ParseCacheL1(_parse_l2, max_file_mb=max_file_mb)
476
+ _log.debug(
477
+ "map-build: parse cache initialised (L2 at %s, max_file_mb=%.1f)",
478
+ real_project_dir, max_file_mb,
479
+ )
480
+
481
+ # Maps whose acc entry is no longer needed once a later map has been built.
482
+ # refactor_boundary is the last consumer of everything, so we free all entries
483
+ # after building it. findings is the last consumer of structural/runtime/etc.
484
+ # We clear the entire acc after refactor_boundary (or findings if that's last).
485
+ _LAST_CONSUMERS: dict[str, str] = {
486
+ "structural": "refactor_boundary",
487
+ "data_contract": "refactor_boundary",
488
+ "authority": "refactor_boundary",
489
+ "runtime": "refactor_boundary",
490
+ "conflict": "refactor_boundary",
491
+ "hotspot": "refactor_boundary",
492
+ "findings": "refactor_boundary",
493
+ "refactor_boundary": "refactor_boundary", # drop immediately
494
+ }
495
+
496
+ for idx, map_name in enumerate(map_set, start=1):
497
+ # Honour cancel before starting each map
498
+ if cancel_event is not None and cancel_event.is_set():
499
+ _log.info("map-build: cancelled before building %s", map_name)
500
+ break
501
+
502
+ _log.info("[%d/%d] Building map: %s", idx, total, map_name)
503
+ try:
504
+ entries_obj, entry_dicts, metadata, had_warning = _build_single_map(
505
+ map_name,
506
+ project_dir,
507
+ timeout_s,
508
+ runtime_target,
509
+ in_memory_acc,
510
+ parse_cache=_parse_l1,
511
+ maps_dir_override=target_maps_dir,
512
+ cancel_event=cancel_event,
513
+ )
514
+ except Exception as exc: # noqa: BLE001
515
+ _log.error("[%d/%d] %s: FAILED -- %s", idx, total, map_name, exc)
516
+ _print_line("[%d/%d] %s: ERROR -- %s" % (idx, total, map_name, exc), no_color)
517
+ return 1
518
+
519
+ # Accumulate in memory for downstream builders
520
+ in_memory_acc[map_name] = entries_obj
521
+
522
+ # Capture churn metadata from hotspot build for index audit (E2)
523
+ if map_name == "hotspot":
524
+ _hotspot_churn_meta = {
525
+ k: metadata.get(k)
526
+ for k in ("churn_source", "git_head_sha", "since_window")
527
+ }
528
+
529
+ if had_warning:
530
+ total_warnings += 1
531
+
532
+ # Count new conflicts for --strict
533
+ if map_name == "conflict":
534
+ new_conflicts = sum(
535
+ 1 for e in entry_dicts
536
+ if isinstance(e, dict) and e.get("lifecycle_status") == "new"
537
+ )
538
+
539
+ duration_s = metadata.get("build_duration_s", 0.0)
540
+
541
+ if dry_run:
542
+ _log.info(
543
+ "[%d/%d] %s: would write (%d entries) [dry-run]",
544
+ idx, total, map_name, len(entry_dicts),
545
+ )
546
+ _print_line(
547
+ "[%d/%d] %s: would write (%d entries) [dry-run]"
548
+ % (idx, total, map_name, len(entry_dicts)),
549
+ no_color,
550
+ file=_progress_file,
551
+ )
552
+ else:
553
+ _producer = "vigil_mapper.%s_builder" % map_name
554
+ _extra_kwargs = _authority_conflict_build_meta_kwargs(map_name, _files_by_lang)
555
+ _bm = _build_build_meta(
556
+ map_name,
557
+ duration_s,
558
+ project_dir,
559
+ _producer,
560
+ _files_by_lang,
561
+ **_extra_kwargs,
562
+ )
563
+ write_map(
564
+ project_dir,
565
+ map_name,
566
+ entry_dicts,
567
+ metadata,
568
+ build_meta=_bm,
569
+ maps_dir_override=target_maps_dir,
570
+ )
571
+ # Log coverage metadata for this builder
572
+ from .map_common import get_coverage_metadata # noqa: PLC0415
573
+ coverage = get_coverage_metadata(map_name)
574
+ _log.debug(
575
+ "[%d/%d] %s coverage: supported_languages=%s",
576
+ idx, total, map_name, coverage["supported_languages"],
577
+ )
578
+ _print_line(
579
+ "[%d/%d] %s: %d entries, %.1fs, %s"
580
+ % (idx, total, map_name, len(entry_dicts), duration_s, _bm.status),
581
+ no_color,
582
+ file=_progress_file,
583
+ )
584
+
585
+ # Release in-memory entries for maps whose last downstream consumer
586
+ # is the map we just built. This bounds peak RSS during the pipeline.
587
+ # entry_dicts (serialised dicts) are retained on the stack until
588
+ # write_map returns; entries_obj (dataclass instances) can be dropped
589
+ # once no future builder will call _make_repo_maps_from_accumulator.
590
+ maps_to_free = [
591
+ k for k, last in _LAST_CONSUMERS.items()
592
+ if last == map_name and k in in_memory_acc
593
+ ]
594
+ for _k in maps_to_free:
595
+ del in_memory_acc[_k]
596
+ _log.debug("map-build: freed in_memory_acc[%s] after %s", _k, map_name)
597
+
598
+ # 4b. Log parse cache stats (debug summary)
599
+ _parse_l1.log_stats()
600
+ _parse_l2.flush()
601
+
602
+ # 5. Regenerate index
603
+ if not dry_run:
604
+ try:
605
+ regenerate_index(project_dir, maps_dir_override=target_maps_dir)
606
+ _log.info("map-build: index regenerated")
607
+ except Exception as exc: # noqa: BLE001
608
+ _log.error("map-build: failed to regenerate index: %s", exc)
609
+ return 1
610
+
611
+ # 5a. Patch churn audit fields into maps.hotspot section of index (E2)
612
+ if _hotspot_churn_meta is not None:
613
+ from .map_storage import maps_dir as _maps_dir
614
+ _index_dir = target_maps_dir if target_maps_dir is not None else _maps_dir(project_dir)
615
+ _index_path = _index_dir / "00_map_index.json"
616
+ try:
617
+ _index_payload = json.loads(_index_path.read_text(encoding="utf-8"))
618
+ _hotspot_section = _index_payload.get("maps", {}).get("hotspot", {})
619
+ _hotspot_section.update(_hotspot_churn_meta)
620
+ _index_payload.setdefault("maps", {})["hotspot"] = _hotspot_section
621
+ _index_path.write_text(
622
+ json.dumps(_index_payload, indent=2, ensure_ascii=False),
623
+ encoding="utf-8",
624
+ )
625
+ _log.info(
626
+ "map-build: hotspot churn metadata patched into index "
627
+ "(churn_source=%s)",
628
+ _hotspot_churn_meta.get("churn_source"),
629
+ )
630
+ except (OSError, json.JSONDecodeError) as exc:
631
+ _log.warning("map-build: failed to patch churn metadata into index: %s", exc)
632
+
633
+ # 6. --json output: print 00_map_index.json to stdout
634
+ if json_mode and not dry_run:
635
+ from .map_storage import maps_dir
636
+ _json_maps_dir = target_maps_dir if target_maps_dir is not None else maps_dir(project_dir)
637
+ index_path = _json_maps_dir / "00_map_index.json"
638
+ if index_path.exists():
639
+ try:
640
+ payload = json.loads(index_path.read_text(encoding="utf-8"))
641
+ text = json.dumps(payload, indent=2, ensure_ascii=False)
642
+ _print_line(text, no_color)
643
+ except (json.JSONDecodeError, OSError) as exc:
644
+ _log.error("map-build: failed to read index: %s", exc)
645
+ return 1
646
+ else:
647
+ _log.warning("map-build: index not found after build")
648
+ elif json_mode and dry_run:
649
+ _log.info("map-build: --json with --dry-run: no index written, skipping JSON output")
650
+
651
+ # 7. --strict exit code logic
652
+ if strict:
653
+ if new_conflicts > 0:
654
+ _log.warning("map-build: strict mode -- %d new conflicts detected", new_conflicts)
655
+ return 4
656
+ if total_warnings > 0:
657
+ _log.warning("map-build: strict mode -- %d warnings", total_warnings)
658
+ return 3
659
+
660
+ # 8. Publish maps to server (remote_authoritative optional feature — not available in standalone)
661
+ # _maybe_materialize_remote_tree is a no-op in standalone, so project_dir == real_project_dir
662
+ # always. This block is preserved as a stub so the original logic is readable.
663
+ _log.debug("map-build: standalone mode — remote publish skipped")
664
+
665
+ _log.info("map-build complete: exit 0")
666
+ return 0
667
+
668
+
669
+ # ---------------------------------------------------------------------------
670
+ # Programmatic API (E0 — A6)
671
+ # ---------------------------------------------------------------------------
672
+
673
+ def run_map_build(
674
+ project_dir: Path,
675
+ *,
676
+ map: str = "all",
677
+ dry_run: bool = False,
678
+ strict: bool = False,
679
+ timeout_s: int = 300,
680
+ output_dir: Path | None = None,
681
+ max_file_mb: float = 5.0,
682
+ cancel_event: "Any | None" = None,
683
+ ) -> int:
684
+ """Programmatic entry point for the map build pipeline.
685
+
686
+ Functional params only -- CLI-shaped params (json, no_color, verbose) are
687
+ NOT exposed here. Returns an integer exit code (0 = success), same
688
+ semantics as cmd_map_build.
689
+
690
+ Args:
691
+ project_dir: Absolute (or resolvable) path to the target project root.
692
+ map: Map name to build, or "all" for the full pipeline.
693
+ dry_run: If True, build all maps in memory but do not write to disk.
694
+ strict: If True, return exit code 3 on warnings, 4 on new conflicts.
695
+ timeout_s: Timeout in seconds for the tracer (runtime map only).
696
+ output_dir: If given, writes maps to this directory instead of
697
+ <project_dir>/.cortex/maps/. The directory is created if absent.
698
+ This is the sanctioned way for background workers to write to a
699
+ temp directory before applying a semantic diff filter (see E1).
700
+ max_file_mb: Files larger than this threshold (MiB) are skipped during
701
+ the build. Skipped files are listed in the result meta under
702
+ ``oversized_files``. Default: 5.0 MiB. Pass float('inf') to
703
+ disable the guard entirely.
704
+ cancel_event: Optional threading.Event. When set, the build stops
705
+ after the current map finishes and returns exit code 0 with
706
+ partial results.
707
+
708
+ Returns:
709
+ Integer exit code:
710
+ 0 -- success
711
+ 1 -- application error
712
+ 2 -- validation error (bad args or path)
713
+ 3 -- strict mode: warnings
714
+ 4 -- strict mode: new conflicts
715
+ """
716
+ import argparse
717
+ ns = argparse.Namespace(
718
+ project=str(project_dir),
719
+ map=map,
720
+ dry_run=dry_run,
721
+ strict=strict,
722
+ timeout_s=timeout_s,
723
+ output_dir=str(output_dir) if output_dir is not None else None,
724
+ verbose=False,
725
+ json=False,
726
+ no_color=True,
727
+ runtime_target=None,
728
+ max_file_mb=max_file_mb,
729
+ cancel_event=cancel_event,
730
+ )
731
+ return cmd_map_build(ns)