vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Data models for findings map (Map 8).
|
|
2
|
+
|
|
3
|
+
Finding -- synthesized diagnosis of architecture/conflict issues across all maps.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import logging
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"EvidenceItem",
|
|
14
|
+
"Finding",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
_log = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
_DATACLASS_KWARGS: dict[str, Any] = {"frozen": True}
|
|
20
|
+
if sys.version_info >= (3, 10):
|
|
21
|
+
_DATACLASS_KWARGS["slots"] = True
|
|
22
|
+
_DATACLASS_KWARGS["kw_only"] = True
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Map 8: Finding Entry
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
@dataclass(**_DATACLASS_KWARGS)
|
|
30
|
+
class EvidenceItem:
|
|
31
|
+
"""Evidence pointing to a source of the finding."""
|
|
32
|
+
kind: str # "source_location" | "map_entry" | "target_path"
|
|
33
|
+
file: str = "" # for source_location
|
|
34
|
+
line: int | None = None # for source_location
|
|
35
|
+
map: str = "" # for map_entry or source_location
|
|
36
|
+
entry_id: str = "" # for map_entry
|
|
37
|
+
path: str = "" # for target_path
|
|
38
|
+
|
|
39
|
+
def to_dict(self) -> dict:
|
|
40
|
+
result = {"kind": self.kind}
|
|
41
|
+
if self.file:
|
|
42
|
+
result["file"] = self.file
|
|
43
|
+
if self.line is not None:
|
|
44
|
+
result["line"] = self.line
|
|
45
|
+
if self.map:
|
|
46
|
+
result["map"] = self.map
|
|
47
|
+
if self.entry_id:
|
|
48
|
+
result["entry_id"] = self.entry_id
|
|
49
|
+
if self.path:
|
|
50
|
+
result["path"] = self.path
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def from_dict(cls, d: dict) -> "EvidenceItem":
|
|
55
|
+
return cls(
|
|
56
|
+
kind=str(d.get("kind", "unknown")),
|
|
57
|
+
file=str(d.get("file", "")),
|
|
58
|
+
line=d.get("line"),
|
|
59
|
+
map=str(d.get("map", "")),
|
|
60
|
+
entry_id=str(d.get("entry_id", "")),
|
|
61
|
+
path=str(d.get("path", "")),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(**_DATACLASS_KWARGS)
|
|
66
|
+
class Finding:
|
|
67
|
+
"""One finding in the findings map (Map 8)."""
|
|
68
|
+
finding_id: str # stable hash of finding
|
|
69
|
+
category: str # architecture_cycle | state_ownership_conflict |
|
|
70
|
+
# schema_drift_risk | runtime_config_risk |
|
|
71
|
+
# write_authority_violation
|
|
72
|
+
title: str
|
|
73
|
+
severity: str # critical | high | medium | low
|
|
74
|
+
confidence: float
|
|
75
|
+
why_it_matters: str
|
|
76
|
+
suggested_fix: str
|
|
77
|
+
affected_files: tuple[str, ...]
|
|
78
|
+
evidence: tuple[str, ...] # JSON-serialised EvidenceItem strings
|
|
79
|
+
source_maps: tuple[str, ...] # which maps contributed to this finding
|
|
80
|
+
finding_status: str # new | existing | worsened | resolved | accepted
|
|
81
|
+
# Metadata
|
|
82
|
+
source: str
|
|
83
|
+
freshness: str
|
|
84
|
+
status: str
|
|
85
|
+
|
|
86
|
+
def to_dict(self) -> dict:
|
|
87
|
+
import json as _json
|
|
88
|
+
return {
|
|
89
|
+
"finding_id": self.finding_id,
|
|
90
|
+
"category": self.category,
|
|
91
|
+
"title": self.title,
|
|
92
|
+
"severity": self.severity,
|
|
93
|
+
"confidence": self.confidence,
|
|
94
|
+
"why_it_matters": self.why_it_matters,
|
|
95
|
+
"suggested_fix": self.suggested_fix,
|
|
96
|
+
"affected_files": list(self.affected_files),
|
|
97
|
+
"evidence": [
|
|
98
|
+
_json.loads(e) if isinstance(e, str) else e for e in self.evidence
|
|
99
|
+
],
|
|
100
|
+
"source_maps": list(self.source_maps),
|
|
101
|
+
"finding_status": self.finding_status,
|
|
102
|
+
"source": self.source,
|
|
103
|
+
"freshness": self.freshness,
|
|
104
|
+
"status": self.status,
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def from_dict(cls, d: dict) -> "Finding":
|
|
109
|
+
import json as _json
|
|
110
|
+
def _serialize_evidence(items: list) -> tuple[str, ...]:
|
|
111
|
+
return tuple(
|
|
112
|
+
_json.dumps(item, sort_keys=True) if isinstance(item, dict) else str(item)
|
|
113
|
+
for item in items
|
|
114
|
+
)
|
|
115
|
+
return cls(
|
|
116
|
+
finding_id=str(d.get("finding_id", "")),
|
|
117
|
+
category=str(d.get("category", "")),
|
|
118
|
+
title=str(d.get("title", "")),
|
|
119
|
+
severity=str(d.get("severity", "medium")),
|
|
120
|
+
confidence=float(d.get("confidence", 0.5)),
|
|
121
|
+
why_it_matters=str(d.get("why_it_matters", "")),
|
|
122
|
+
suggested_fix=str(d.get("suggested_fix", "")),
|
|
123
|
+
affected_files=tuple(d.get("affected_files", [])),
|
|
124
|
+
evidence=_serialize_evidence(d.get("evidence", [])),
|
|
125
|
+
source_maps=tuple(d.get("source_maps", [])),
|
|
126
|
+
finding_status=str(d.get("finding_status", "new")),
|
|
127
|
+
source=str(d.get("source", "synthesis")),
|
|
128
|
+
freshness=str(d.get("freshness", "")),
|
|
129
|
+
status=str(d.get("status", "validated")),
|
|
130
|
+
)
|
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
"""Atomic read/write, filelock-wrapped map storage for the map builder subsystem.
|
|
2
|
+
|
|
3
|
+
Generic design: all output goes to <project_dir>/.cortex/maps/ by default.
|
|
4
|
+
This works for any target project (user project, or Vigil self-diag via
|
|
5
|
+
`--project ./`).
|
|
6
|
+
|
|
7
|
+
Atomic write pattern (tempfile.mkstemp + os.replace):
|
|
8
|
+
fd, tmp_path = tempfile.mkstemp(dir=..., prefix=..., suffix=".tmp")
|
|
9
|
+
try:
|
|
10
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
11
|
+
json.dump(payload, fh, ...) ; fh.write("\\n")
|
|
12
|
+
os.replace(tmp_path, target)
|
|
13
|
+
except BaseException:
|
|
14
|
+
try: os.unlink(tmp_path)
|
|
15
|
+
except OSError: pass
|
|
16
|
+
raise
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import logging
|
|
22
|
+
import os
|
|
23
|
+
import tempfile
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Optional
|
|
27
|
+
|
|
28
|
+
from .map_common import MAPS_SUBDIR, SEEDS_SUBDIR
|
|
29
|
+
from .map_errors import MapConcurrencyError, MapSecurityError
|
|
30
|
+
from .map_models import (
|
|
31
|
+
AuthorityDomain,
|
|
32
|
+
BuildMeta,
|
|
33
|
+
DataContractEntry,
|
|
34
|
+
RepoMaps,
|
|
35
|
+
RuntimeNode,
|
|
36
|
+
StructuralEntry,
|
|
37
|
+
)
|
|
38
|
+
from .map_models_ext import ConflictEntry, HotspotEntry, RefactorBoundary
|
|
39
|
+
from .map_models_findings import Finding
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
"BuildMeta",
|
|
43
|
+
"load_repo_maps",
|
|
44
|
+
"write_map",
|
|
45
|
+
"regenerate_index",
|
|
46
|
+
"maps_dir",
|
|
47
|
+
"seeds_dir",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
_log = logging.getLogger(__name__)
|
|
51
|
+
|
|
52
|
+
# Map filenames (relative to maps_dir(project_dir))
|
|
53
|
+
_MAP_FILES: dict[str, str] = {
|
|
54
|
+
"structural": "10_structural_map.json",
|
|
55
|
+
"runtime": "20_runtime_map.json",
|
|
56
|
+
"data_contract": "30_data_contract_map.json",
|
|
57
|
+
"authority": "40_authority_map.json",
|
|
58
|
+
"conflict": "50_conflict_map.json",
|
|
59
|
+
"hotspot": "60_hotspot_map.json",
|
|
60
|
+
"findings": "80_findings_map.json",
|
|
61
|
+
"refactor_boundary": "70_refactor_boundaries.json",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
_INDEX_FILE = "00_map_index.json"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# Path helpers (public — exposed in __all__ and __init__.py)
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
def maps_dir(project_dir: Path) -> Path:
|
|
72
|
+
"""Default output location: <project_dir>/.cortex/maps/"""
|
|
73
|
+
return project_dir.resolve() / MAPS_SUBDIR
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def seeds_dir(project_dir: Path) -> Path:
|
|
77
|
+
"""Default seed config location: <project_dir>/.cortex/map_seeds/"""
|
|
78
|
+
return project_dir.resolve() / SEEDS_SUBDIR
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
# Internal helpers
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
def _check_path_security(project_dir: Path, target: Path) -> None:
|
|
86
|
+
"""Verify target is inside project_dir. Raises MapSecurityError otherwise."""
|
|
87
|
+
project_dir_resolved = project_dir.resolve(strict=False)
|
|
88
|
+
target_resolved = target.resolve(strict=False)
|
|
89
|
+
try:
|
|
90
|
+
target_resolved.relative_to(project_dir_resolved)
|
|
91
|
+
except ValueError:
|
|
92
|
+
raise MapSecurityError(
|
|
93
|
+
"Path escape attempt: %s is not inside %s" % (target, project_dir)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _atomic_write_json(path: Path, payload: dict) -> None:
|
|
98
|
+
"""Write payload to path atomically via tempfile + os.replace.
|
|
99
|
+
|
|
100
|
+
Pattern mirrors SYSTEM/runtime/runtime_lock.py::acquire().
|
|
101
|
+
"""
|
|
102
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
fd, tmp_path = tempfile.mkstemp(
|
|
104
|
+
dir=str(path.parent),
|
|
105
|
+
prefix=".map_",
|
|
106
|
+
suffix=".tmp",
|
|
107
|
+
)
|
|
108
|
+
try:
|
|
109
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
110
|
+
fh.write(json.dumps(payload, indent=2, ensure_ascii=False, sort_keys=True))
|
|
111
|
+
fh.write("\n")
|
|
112
|
+
os.replace(tmp_path, str(path))
|
|
113
|
+
except BaseException:
|
|
114
|
+
try:
|
|
115
|
+
os.unlink(tmp_path)
|
|
116
|
+
except OSError as exc:
|
|
117
|
+
_log.error("map_storage: cleanup failed for %s: %s", tmp_path, exc)
|
|
118
|
+
raise
|
|
119
|
+
_log.debug("_atomic_write_json: wrote %s", path)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _read_json(path: Path) -> Optional[dict]:
|
|
123
|
+
"""Read JSON from path.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
dict -- file exists and parses cleanly.
|
|
127
|
+
None -- file does not exist (legitimate absent state).
|
|
128
|
+
|
|
129
|
+
Raises:
|
|
130
|
+
OSError / UnicodeDecodeError -- I/O failure reading an existing file.
|
|
131
|
+
ValueError -- file exists but contains invalid JSON or
|
|
132
|
+
a non-dict top-level value (corrupt map).
|
|
133
|
+
"""
|
|
134
|
+
if not path.exists():
|
|
135
|
+
_log.debug("_read_json: file not found: %s", path)
|
|
136
|
+
return None
|
|
137
|
+
try:
|
|
138
|
+
text = path.read_text(encoding="utf-8")
|
|
139
|
+
except (OSError, UnicodeDecodeError) as exc:
|
|
140
|
+
_log.error("_read_json: I/O error reading %s: %s", path, exc)
|
|
141
|
+
raise
|
|
142
|
+
try:
|
|
143
|
+
data = json.loads(text)
|
|
144
|
+
except json.JSONDecodeError as exc:
|
|
145
|
+
_log.error("_read_json: corrupt JSON in %s: %s", path, exc)
|
|
146
|
+
raise ValueError("%s is corrupt: %s" % (path, exc)) from exc
|
|
147
|
+
if not isinstance(data, dict):
|
|
148
|
+
msg = "_read_json: expected dict, got %s in %s" % (type(data).__name__, path)
|
|
149
|
+
_log.error(msg)
|
|
150
|
+
raise ValueError(msg)
|
|
151
|
+
return data
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
# Public API
|
|
156
|
+
# ---------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
def load_repo_maps(project_dir: Path) -> RepoMaps:
|
|
159
|
+
"""Load all 8 maps from <project_dir>/.cortex/maps/.
|
|
160
|
+
|
|
161
|
+
Returns RepoMaps(missing=True) if the maps directory is absent.
|
|
162
|
+
"""
|
|
163
|
+
mdir = maps_dir(project_dir)
|
|
164
|
+
if not mdir.is_dir():
|
|
165
|
+
_log.info("load_repo_maps: maps directory absent at %s -- returning missing=True", mdir)
|
|
166
|
+
return RepoMaps(missing=True)
|
|
167
|
+
|
|
168
|
+
def _load_entries(name: str, from_dict_fn):
|
|
169
|
+
filename = _MAP_FILES[name]
|
|
170
|
+
path = mdir / filename
|
|
171
|
+
payload = _read_json(path)
|
|
172
|
+
if payload is None:
|
|
173
|
+
_log.warning("load_repo_maps: map file absent or corrupt: %s", filename)
|
|
174
|
+
return ()
|
|
175
|
+
entries_raw = payload.get("entries", [])
|
|
176
|
+
result = []
|
|
177
|
+
for raw in entries_raw:
|
|
178
|
+
try:
|
|
179
|
+
result.append(from_dict_fn(raw))
|
|
180
|
+
except (KeyError, TypeError, ValueError) as exc:
|
|
181
|
+
_log.warning("load_repo_maps: skipping corrupt entry in %s: %s", filename, exc)
|
|
182
|
+
return tuple(result)
|
|
183
|
+
|
|
184
|
+
structural = _load_entries("structural", StructuralEntry.from_dict)
|
|
185
|
+
runtime = _load_entries("runtime", RuntimeNode.from_dict)
|
|
186
|
+
data_contract = _load_entries("data_contract", DataContractEntry.from_dict)
|
|
187
|
+
authority = _load_entries("authority", AuthorityDomain.from_dict)
|
|
188
|
+
conflict = _load_entries("conflict", ConflictEntry.from_dict)
|
|
189
|
+
hotspot = _load_entries("hotspot", HotspotEntry.from_dict)
|
|
190
|
+
refactor_boundary = _load_entries("refactor_boundary", RefactorBoundary.from_dict)
|
|
191
|
+
|
|
192
|
+
# Load findings (Map 8)
|
|
193
|
+
findings_list: list[Finding] = []
|
|
194
|
+
findings_path = mdir / _MAP_FILES["findings"]
|
|
195
|
+
if findings_path.exists():
|
|
196
|
+
try:
|
|
197
|
+
payload = json.loads(findings_path.read_text(encoding="utf-8"))
|
|
198
|
+
for raw_entry in payload.get("entries", []):
|
|
199
|
+
try:
|
|
200
|
+
findings_list.append(Finding.from_dict(raw_entry))
|
|
201
|
+
except (KeyError, TypeError, ValueError) as exc:
|
|
202
|
+
_log.warning("load_repo_maps: skipping corrupt finding entry: %s", exc)
|
|
203
|
+
except (OSError, json.JSONDecodeError, ValueError, KeyError) as exc:
|
|
204
|
+
_log.warning("load_repo_maps: failed to load findings from %s: %s", findings_path, exc)
|
|
205
|
+
|
|
206
|
+
_log.info(
|
|
207
|
+
"load_repo_maps: loaded structural=%d runtime=%d data_contract=%d "
|
|
208
|
+
"authority=%d conflict=%d hotspot=%d boundary=%d findings=%d",
|
|
209
|
+
len(structural), len(runtime), len(data_contract),
|
|
210
|
+
len(authority), len(conflict), len(hotspot), len(refactor_boundary),
|
|
211
|
+
len(findings_list),
|
|
212
|
+
)
|
|
213
|
+
return RepoMaps(
|
|
214
|
+
structural=structural,
|
|
215
|
+
runtime=runtime,
|
|
216
|
+
data_contract=data_contract,
|
|
217
|
+
authority=authority,
|
|
218
|
+
conflict=conflict,
|
|
219
|
+
hotspot=hotspot,
|
|
220
|
+
refactor_boundary=refactor_boundary,
|
|
221
|
+
findings=tuple(findings_list),
|
|
222
|
+
missing=False,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def write_map(
|
|
227
|
+
project_dir: Path,
|
|
228
|
+
name: str,
|
|
229
|
+
entries: list,
|
|
230
|
+
metadata: dict,
|
|
231
|
+
*,
|
|
232
|
+
build_meta: BuildMeta | None = None,
|
|
233
|
+
maps_dir_override: Path | None = None,
|
|
234
|
+
) -> None:
|
|
235
|
+
"""Atomically write a named map with filelock protection.
|
|
236
|
+
|
|
237
|
+
Output path: <maps_dir>/<filename>.json (maps_dir defaults to
|
|
238
|
+
<project_dir>/.cortex/maps/ or maps_dir_override if given).
|
|
239
|
+
Lock path: <maps_dir>/.<name>.lock
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
project_dir: Absolute path to the target project root. Used for the
|
|
243
|
+
default maps directory and security checks when no override given.
|
|
244
|
+
name: Map name key (e.g. "structural"). Must be in _MAP_FILES.
|
|
245
|
+
entries: List of entry dicts to write under the "entries" key.
|
|
246
|
+
metadata: Additional top-level keys merged into the payload
|
|
247
|
+
(e.g. schema_version, produced_by, trace_status).
|
|
248
|
+
build_meta: Optional BuildMeta instance. When provided, adds a
|
|
249
|
+
top-level "build_meta" key and sets schema_version to "2.0.0".
|
|
250
|
+
When None, schema_version remains "1.0.0" (backward compat).
|
|
251
|
+
maps_dir_override: If given, writes to this directory instead of
|
|
252
|
+
<project_dir>/.cortex/maps/. The directory is created if absent.
|
|
253
|
+
Security check is performed against this directory's own resolved
|
|
254
|
+
path (not project_dir) so temp dirs are allowed.
|
|
255
|
+
|
|
256
|
+
Raises:
|
|
257
|
+
MapConcurrencyError: If filelock timeout exceeded.
|
|
258
|
+
MapSecurityError: If computed path escapes the target dir.
|
|
259
|
+
KeyError: If name is not a known map name.
|
|
260
|
+
"""
|
|
261
|
+
try:
|
|
262
|
+
from filelock import FileLock, Timeout as FileLockTimeout
|
|
263
|
+
except ImportError as exc:
|
|
264
|
+
raise ImportError("filelock is required: %s" % exc) from exc
|
|
265
|
+
|
|
266
|
+
if name not in _MAP_FILES:
|
|
267
|
+
raise KeyError("Unknown map name: %s. Known names: %s" % (name, list(_MAP_FILES)))
|
|
268
|
+
|
|
269
|
+
if maps_dir_override is not None:
|
|
270
|
+
mdir = maps_dir_override.resolve()
|
|
271
|
+
target_path = mdir / _MAP_FILES[name]
|
|
272
|
+
lock_path = mdir / (".%s.lock" % name)
|
|
273
|
+
# Security check: target must stay inside the override dir
|
|
274
|
+
try:
|
|
275
|
+
target_path.relative_to(mdir)
|
|
276
|
+
except ValueError:
|
|
277
|
+
from .map_errors import MapSecurityError
|
|
278
|
+
raise MapSecurityError(
|
|
279
|
+
"Path escape attempt: %s is not inside override dir %s" % (target_path, mdir)
|
|
280
|
+
)
|
|
281
|
+
else:
|
|
282
|
+
mdir = maps_dir(project_dir)
|
|
283
|
+
target_path = mdir / _MAP_FILES[name]
|
|
284
|
+
lock_path = mdir / (".%s.lock" % name)
|
|
285
|
+
# Security check before acquiring lock
|
|
286
|
+
_check_path_security(project_dir, target_path)
|
|
287
|
+
_check_path_security(project_dir, lock_path)
|
|
288
|
+
|
|
289
|
+
mdir.mkdir(parents=True, exist_ok=True)
|
|
290
|
+
|
|
291
|
+
payload: dict[str, Any] = {
|
|
292
|
+
"schema_version": "1.0.0",
|
|
293
|
+
"produced_by": "vigil_mapper.v1",
|
|
294
|
+
}
|
|
295
|
+
payload.update(metadata)
|
|
296
|
+
payload["entries"] = entries
|
|
297
|
+
|
|
298
|
+
# Embed build_meta if provided. The payload schema_version stays at
|
|
299
|
+
# "1.0.0" for backward-compat with validators (e.g. refactor_boundary_builder
|
|
300
|
+
# enforces major ≤ 1). The PRESENCE of "build_meta" is the v2.0.0 signal;
|
|
301
|
+
# regenerate_index reads it to populate the index and emits schema_version
|
|
302
|
+
# "2.0.0" in the per-map index entry when build_meta is present.
|
|
303
|
+
if build_meta is not None:
|
|
304
|
+
payload["build_meta"] = build_meta.to_dict()
|
|
305
|
+
|
|
306
|
+
_log.debug("write_map: acquiring lock for %s", name)
|
|
307
|
+
try:
|
|
308
|
+
with FileLock(str(lock_path), timeout=10):
|
|
309
|
+
_log.debug("write_map: lock acquired for %s", name)
|
|
310
|
+
_atomic_write_json(target_path, payload)
|
|
311
|
+
except FileLockTimeout as exc:
|
|
312
|
+
_log.error("write_map: filelock timeout for map %s: %s", name, exc)
|
|
313
|
+
raise MapConcurrencyError(
|
|
314
|
+
"Filelock timeout (10s) for map %s -- another writer may be running" % name
|
|
315
|
+
) from exc
|
|
316
|
+
|
|
317
|
+
_log.info("write_map: wrote %s (%d entries)", name, len(entries))
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def regenerate_index(
|
|
321
|
+
project_dir: Path,
|
|
322
|
+
*,
|
|
323
|
+
maps_dir_override: Path | None = None,
|
|
324
|
+
) -> None:
|
|
325
|
+
"""Rebuild 00_map_index.json from existing map files on disk.
|
|
326
|
+
|
|
327
|
+
Structure per plan sec.9 Observability Contract.
|
|
328
|
+
Output: <maps_dir>/00_map_index.json (default: <project_dir>/.cortex/maps/).
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
project_dir: Absolute path to the target project root.
|
|
332
|
+
maps_dir_override: If given, reads maps from and writes index to this
|
|
333
|
+
directory instead of <project_dir>/.cortex/maps/.
|
|
334
|
+
"""
|
|
335
|
+
if maps_dir_override is not None:
|
|
336
|
+
mdir = maps_dir_override.resolve()
|
|
337
|
+
else:
|
|
338
|
+
mdir = maps_dir(project_dir)
|
|
339
|
+
mdir.mkdir(parents=True, exist_ok=True)
|
|
340
|
+
|
|
341
|
+
from .fingerprint import map_schema_hash
|
|
342
|
+
|
|
343
|
+
built_at = (
|
|
344
|
+
datetime.now(timezone.utc)
|
|
345
|
+
.isoformat()
|
|
346
|
+
.replace("+00:00", "Z")
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
maps_section: dict[str, Any] = {}
|
|
350
|
+
total_entries = 0
|
|
351
|
+
warnings_count = 0
|
|
352
|
+
errors_count = 0
|
|
353
|
+
all_schema_versions: list[str] = []
|
|
354
|
+
# Accumulate language -> {map_name: bool} across all maps for the matrix.
|
|
355
|
+
# Key: language string. Value: dict mapping each map name to True/False.
|
|
356
|
+
_lang_support: dict[str, dict[str, bool]] = {}
|
|
357
|
+
|
|
358
|
+
for name, filename in _MAP_FILES.items():
|
|
359
|
+
path = mdir / filename
|
|
360
|
+
payload = _read_json(path)
|
|
361
|
+
if payload is None:
|
|
362
|
+
warnings_count += 1
|
|
363
|
+
maps_section[name] = {"status": "missing"}
|
|
364
|
+
continue
|
|
365
|
+
|
|
366
|
+
entries = payload.get("entries", [])
|
|
367
|
+
entry_count = len(entries)
|
|
368
|
+
total_entries += entry_count
|
|
369
|
+
schema_ver = payload.get("schema_version", "0.0.0")
|
|
370
|
+
all_schema_versions.append(schema_ver)
|
|
371
|
+
|
|
372
|
+
schema_hash_val = map_schema_hash(entries) if entries else ""
|
|
373
|
+
file_bytes = path.stat().st_size if path.exists() else 0
|
|
374
|
+
|
|
375
|
+
# Derive per-map index entry from build_meta if present (v2.0.0),
|
|
376
|
+
# otherwise emit a legacy sentinel (v1.0.0 payloads without build_meta).
|
|
377
|
+
raw_build_meta = payload.get("build_meta")
|
|
378
|
+
if raw_build_meta is not None:
|
|
379
|
+
bm = BuildMeta.from_dict(raw_build_meta)
|
|
380
|
+
supported_langs: list[str] = bm.coverage.get("supported_languages", [])
|
|
381
|
+
|
|
382
|
+
# Accumulate into the cross-map language support matrix.
|
|
383
|
+
all_langs = set(bm.coverage.get("files_scanned_by_lang", {}).keys()) | set(supported_langs)
|
|
384
|
+
for lang in all_langs:
|
|
385
|
+
if lang not in _lang_support:
|
|
386
|
+
_lang_support[lang] = {}
|
|
387
|
+
_lang_support[lang][name] = lang in supported_langs
|
|
388
|
+
|
|
389
|
+
map_entry: dict[str, Any] = {
|
|
390
|
+
"analysis_mode": bm.analysis_mode,
|
|
391
|
+
# build_duration_s and built_at are in semantic_diff._IGNORED_FIELDS,
|
|
392
|
+
# so they are stripped during I2 determinism checks.
|
|
393
|
+
"build_duration_s": bm.duration_s,
|
|
394
|
+
"built_at": bm.built_at,
|
|
395
|
+
"confidence_avg": bm.confidence_avg,
|
|
396
|
+
"coverage_ratio": bm.coverage.get("coverage_ratio", 0.0),
|
|
397
|
+
"entry_count": entry_count,
|
|
398
|
+
# file_bytes is in _IGNORED_FIELDS (size jitters with duration precision).
|
|
399
|
+
"file_bytes": file_bytes,
|
|
400
|
+
"languages_detected": bm.coverage.get("files_scanned_by_lang", {}),
|
|
401
|
+
"producer": bm.producer,
|
|
402
|
+
"reason": bm.reason,
|
|
403
|
+
"schema_hash": schema_hash_val,
|
|
404
|
+
"schema_version": "2.0.0", # build_meta presence signals v2
|
|
405
|
+
"supported_languages": supported_langs,
|
|
406
|
+
"status": bm.status,
|
|
407
|
+
}
|
|
408
|
+
# 5.3: surface unsupported_files_sample when the build_meta contains it.
|
|
409
|
+
if bm.unsupported_files_sample:
|
|
410
|
+
map_entry["unsupported_files_sample"] = list(bm.unsupported_files_sample)
|
|
411
|
+
maps_section[name] = map_entry
|
|
412
|
+
else:
|
|
413
|
+
# Legacy v1.0.0 payload — no build_meta present.
|
|
414
|
+
maps_section[name] = {
|
|
415
|
+
"entry_count": entry_count,
|
|
416
|
+
"file_bytes": file_bytes,
|
|
417
|
+
"schema_hash": schema_hash_val,
|
|
418
|
+
"schema_version": "1.0.0",
|
|
419
|
+
"status": "legacy",
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
schema_versions_all_equal = len(set(all_schema_versions)) <= 1
|
|
423
|
+
|
|
424
|
+
# 5.1: Build per-language support matrix across all 7 maps.
|
|
425
|
+
# For each language seen in any map, record True/False per map name.
|
|
426
|
+
# Maps absent from a language's accumulator are filled with False.
|
|
427
|
+
all_map_names = list(_MAP_FILES.keys())
|
|
428
|
+
language_support: dict[str, dict[str, bool]] = {
|
|
429
|
+
lang: {mn: _lang_support[lang].get(mn, False) for mn in all_map_names}
|
|
430
|
+
for lang in sorted(_lang_support)
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
index_payload: dict[str, Any] = {
|
|
434
|
+
"schema_version": "1.0.0",
|
|
435
|
+
"produced_by": "vigil_mapper.v1",
|
|
436
|
+
"built_at": built_at,
|
|
437
|
+
"pipeline_success": errors_count == 0,
|
|
438
|
+
"maps": maps_section,
|
|
439
|
+
"global": {
|
|
440
|
+
"total_entries": total_entries,
|
|
441
|
+
"schema_versions_all_equal": schema_versions_all_equal,
|
|
442
|
+
"warnings_count": warnings_count,
|
|
443
|
+
"errors_count": errors_count,
|
|
444
|
+
"language_support": language_support,
|
|
445
|
+
},
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
index_path = mdir / _INDEX_FILE
|
|
449
|
+
if maps_dir_override is None:
|
|
450
|
+
_check_path_security(project_dir, index_path)
|
|
451
|
+
_atomic_write_json(index_path, index_payload)
|
|
452
|
+
_log.info(
|
|
453
|
+
"regenerate_index: total_entries=%d warnings=%d errors=%d",
|
|
454
|
+
total_entries, warnings_count, errors_count,
|
|
455
|
+
)
|