vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
"""Shared utilities, constants and helpers for the map builder subsystem.
|
|
2
|
+
|
|
3
|
+
Contains: iter_py_files, make_metadata, STRUCTURAL_THRESHOLDS,
|
|
4
|
+
HOTSPOT_WEIGHTS, hotspot_mode_for_score.
|
|
5
|
+
|
|
6
|
+
Generic design: operates on any target project_dir, not on Vigil itself.
|
|
7
|
+
Default layout per-project:
|
|
8
|
+
<project_dir>/.cortex/maps/ -- generated map outputs
|
|
9
|
+
<project_dir>/.cortex/map_seeds/ -- optional seed config
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Iterator, Sequence
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"classify_file_role",
|
|
20
|
+
"iter_py_files",
|
|
21
|
+
"iter_source_files",
|
|
22
|
+
"make_metadata",
|
|
23
|
+
"get_coverage_metadata",
|
|
24
|
+
"get_file_inventory_cache",
|
|
25
|
+
"update_file_inventory_cache",
|
|
26
|
+
"STRUCTURAL_THRESHOLDS",
|
|
27
|
+
"CONTRACT_THRESHOLDS",
|
|
28
|
+
"HOTSPOT_WEIGHTS",
|
|
29
|
+
"hotspot_mode_for_score",
|
|
30
|
+
"MAPS_SUBDIR",
|
|
31
|
+
"SEEDS_SUBDIR",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
_log = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Convention paths (per-project, relative to project_dir)
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
# Default per-project layout (under target project):
|
|
41
|
+
# <project_dir>/.cortex/maps/ -- generated map outputs
|
|
42
|
+
# <project_dir>/.cortex/map_seeds/ -- optional seed config
|
|
43
|
+
# Matches existing .cortex/ convention in Vigil (gate_profile.json etc).
|
|
44
|
+
MAPS_SUBDIR = ".cortex/maps"
|
|
45
|
+
SEEDS_SUBDIR = ".cortex/map_seeds"
|
|
46
|
+
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# Threshold constants
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
STRUCTURAL_THRESHOLDS: dict[str, int] = {
|
|
52
|
+
"large_file_lines": 1200,
|
|
53
|
+
"high_fan_in": 10,
|
|
54
|
+
"high_fan_out": 15,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
CONTRACT_THRESHOLDS: dict[str, int] = {
|
|
58
|
+
"max_drift_flags": 10,
|
|
59
|
+
"max_variants": 20,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Hotspot scoring weights per spec Part 6 formula.
|
|
63
|
+
# hotspot_score =
|
|
64
|
+
# structural_risk[0-20] + runtime_risk[0-20] + authority_risk[0-20]
|
|
65
|
+
# + duplication_score[0-20] + failure_frequency[0-20]
|
|
66
|
+
# + test_gap[0-20] + churn[0-20] - confidence[0-10]
|
|
67
|
+
#
|
|
68
|
+
# Keys marked NEW were added in Wave A / Agent 3 to fix test-file overscoring
|
|
69
|
+
# and improve authority/structural signal fidelity.
|
|
70
|
+
HOTSPOT_WEIGHTS: dict = {
|
|
71
|
+
# --- Per-component caps (unchanged from spec) ---
|
|
72
|
+
"structural_risk_max": 20,
|
|
73
|
+
"runtime_risk_max": 20,
|
|
74
|
+
"authority_risk_max": 20,
|
|
75
|
+
"duplication_score_max": 20,
|
|
76
|
+
"failure_frequency_max": 20,
|
|
77
|
+
"test_gap_max": 20,
|
|
78
|
+
"churn_max": 20,
|
|
79
|
+
"confidence_penalty_max": 10,
|
|
80
|
+
# --- Structural tag weights (NEW: fan_in 5→8, cycle_member 2→5, unparseable=0) ---
|
|
81
|
+
"structural_tags": {
|
|
82
|
+
"large_file": 10,
|
|
83
|
+
"high_fan_in": 8, # up from 5: fan-in is a critical structural signal
|
|
84
|
+
"high_fan_out": 3,
|
|
85
|
+
"cycle_member": 5, # up from 2: cycle membership = elevated risk
|
|
86
|
+
"unparseable": 0, # no score bonus for parse errors
|
|
87
|
+
},
|
|
88
|
+
# --- Runtime tag weights (unchanged) ---
|
|
89
|
+
"runtime_tags": {
|
|
90
|
+
"import_time_side_effects": 8,
|
|
91
|
+
"background_task": 5,
|
|
92
|
+
"decorator_registry": 3,
|
|
93
|
+
},
|
|
94
|
+
# --- Authority risk weights (NEW: replaces flat +15 with conflict-aware tiers) ---
|
|
95
|
+
"authority_risk_base": 5, # NEW: canonical_owner, no open conflicts -> +5
|
|
96
|
+
"authority_risk_with_conflict": 20, # NEW: canonical_owner + any open conflict -> +20
|
|
97
|
+
"authority_writer_in_conflict": 10, # NEW: file is a source in an open conflict -> +10
|
|
98
|
+
# --- Test file penalty (NEW) ---
|
|
99
|
+
"test_file_penalty": -10, # NEW: test_*.py / *_test.py -> -10
|
|
100
|
+
# --- Churn cap (E2) ---
|
|
101
|
+
"churn_cap": 20, # E2: log-scale churn component ceiling
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
# Hotspot mode thresholds (inclusive lower bound)
|
|
105
|
+
_HOTSPOT_MODE_THRESHOLDS: list[tuple[int, str]] = [
|
|
106
|
+
(90, "do_not_touch_without_runtime_trace"),
|
|
107
|
+
(60, "forensic_first"),
|
|
108
|
+
(30, "contained_refactor"),
|
|
109
|
+
(0, "safe_refactor"),
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# Exclusion set for iter_py_files
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
# Directory names (any path component) that are always excluded.
|
|
117
|
+
# SYSTEM/libs is a special case: excluded by subtree prefix matching below.
|
|
118
|
+
_DEFAULT_EXCLUSIONS: frozenset[str] = frozenset({
|
|
119
|
+
".git",
|
|
120
|
+
"__pycache__",
|
|
121
|
+
".venv",
|
|
122
|
+
"venv",
|
|
123
|
+
"env",
|
|
124
|
+
"node_modules",
|
|
125
|
+
".tox",
|
|
126
|
+
"build",
|
|
127
|
+
"dist",
|
|
128
|
+
".pytest_cache",
|
|
129
|
+
".mypy_cache",
|
|
130
|
+
".ruff_cache",
|
|
131
|
+
# Vendored / build-output dirs that can appear OUTSIDE a venv (checked-in
|
|
132
|
+
# dependency trees, packaging artifacts). Excluded so the file-count guard
|
|
133
|
+
# and the map build never spend time on third-party code.
|
|
134
|
+
"site-packages",
|
|
135
|
+
"dist-packages",
|
|
136
|
+
".eggs",
|
|
137
|
+
".next",
|
|
138
|
+
# Don't scan map outputs or seeds themselves
|
|
139
|
+
".cortex",
|
|
140
|
+
# Exclude tool/agent config directories
|
|
141
|
+
".claude", # Claude Code config + agent worktrees
|
|
142
|
+
".codex", # Codex cache directory
|
|
143
|
+
".prompt-engineer", # PE Supervisor docs (roadmap.md, AGREEMENTS.md)
|
|
144
|
+
".a1", # Task manager artifacts (tasks.json, plans/{id}.md)
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
# Subtree prefixes (as posix relative paths) that are excluded even if
|
|
148
|
+
# their directory name doesn't appear in _DEFAULT_EXCLUSIONS.
|
|
149
|
+
# SYSTEM/libs is a vendor bundle in Vigil — harmless for user projects
|
|
150
|
+
# (path simply won't exist), and correctly excluded for Vigil self-diag.
|
|
151
|
+
_EXCLUDED_SUBTREE_PREFIXES: tuple[str, ...] = (
|
|
152
|
+
"SYSTEM/libs/",
|
|
153
|
+
".claude/", # Entire .claude subtree (includes worktrees, plans, memory)
|
|
154
|
+
".codex/", # Codex cache directory
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
# Helpers
|
|
160
|
+
# ---------------------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
def classify_file_role(rel_posix: str) -> str:
|
|
163
|
+
"""Classify file role: 'test', 'fixture', 'generated', or 'production'.
|
|
164
|
+
|
|
165
|
+
Used to distinguish production code from test/fixture/generated artifacts.
|
|
166
|
+
"""
|
|
167
|
+
s = rel_posix.lower().replace("\\", "/")
|
|
168
|
+
|
|
169
|
+
# Fixture patterns take precedence
|
|
170
|
+
if any(p in s for p in ("/fixtures/", "/snapshots/", "/mocks/")):
|
|
171
|
+
return "fixture"
|
|
172
|
+
|
|
173
|
+
# Test patterns
|
|
174
|
+
test_patterns = ("/tests/", "/test/", "test_", "_test.py", "/conftest.py")
|
|
175
|
+
if any(p in s for p in test_patterns):
|
|
176
|
+
return "test"
|
|
177
|
+
|
|
178
|
+
# Generated patterns
|
|
179
|
+
generated_patterns = ("/generated/", "/migrations/", "/alembic/")
|
|
180
|
+
if any(p in s for p in generated_patterns):
|
|
181
|
+
return "generated"
|
|
182
|
+
|
|
183
|
+
return "production"
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def iter_source_files(
|
|
187
|
+
project_dir: Path,
|
|
188
|
+
languages: Sequence[str] | None = None,
|
|
189
|
+
include_roots: Sequence[str] | None = None,
|
|
190
|
+
) -> Iterator[Path]:
|
|
191
|
+
"""Yield absolute paths to source files handled by registered adapters.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
project_dir: Root of the target project to scan.
|
|
195
|
+
languages: Optional list of language names (e.g. ``["python"]``) to
|
|
196
|
+
restrict the scan to. If None, all extensions registered in
|
|
197
|
+
``ADAPTERS`` are included. Unknown language names silently
|
|
198
|
+
contribute zero extensions.
|
|
199
|
+
include_roots: Optional list of subdirectory names relative to
|
|
200
|
+
project_dir to restrict the scan to. If None (default), the
|
|
201
|
+
entire project_dir is walked (minus exclusions).
|
|
202
|
+
|
|
203
|
+
Exclusions applied always:
|
|
204
|
+
- Any path component matching _DEFAULT_EXCLUSIONS (.git, __pycache__,
|
|
205
|
+
.venv, venv, env, node_modules, .tox, build, dist, .pytest_cache,
|
|
206
|
+
.mypy_cache, .ruff_cache, .cortex).
|
|
207
|
+
- SYSTEM/libs/ subtree (Vigil vendor bundle; harmless for user
|
|
208
|
+
projects where that path simply does not exist).
|
|
209
|
+
- Symlinks resolving outside project_dir.
|
|
210
|
+
|
|
211
|
+
Output is sorted for deterministic ordering.
|
|
212
|
+
|
|
213
|
+
Notes:
|
|
214
|
+
ADAPTERS currently registers 5 languages: Python, TypeScript, JavaScript,
|
|
215
|
+
Go, and Java. ``iter_source_files`` yields files for all of them by
|
|
216
|
+
default (no ``languages`` filter). Pass ``languages=["python"]`` to
|
|
217
|
+
restrict to ``.py`` only (see ``iter_py_files``).
|
|
218
|
+
"""
|
|
219
|
+
# Import here to avoid circular import (source_adapters imports map_common
|
|
220
|
+
# indirectly through builder utilities in L2+; keeping deferred is safe).
|
|
221
|
+
from .source_adapters import ADAPTERS # noqa: PLC0415
|
|
222
|
+
|
|
223
|
+
if languages is None:
|
|
224
|
+
target_exts: frozenset[str] = frozenset(ADAPTERS.keys())
|
|
225
|
+
else:
|
|
226
|
+
target_exts = frozenset(
|
|
227
|
+
ext
|
|
228
|
+
for ext, adapter in ADAPTERS.items()
|
|
229
|
+
if adapter.language in languages
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
if not target_exts:
|
|
233
|
+
_log.debug(
|
|
234
|
+
"iter_source_files: no registered extensions for languages=%r -- returning empty",
|
|
235
|
+
languages,
|
|
236
|
+
)
|
|
237
|
+
return
|
|
238
|
+
|
|
239
|
+
results: list[Path] = []
|
|
240
|
+
project_dir = project_dir.resolve()
|
|
241
|
+
|
|
242
|
+
if include_roots is not None:
|
|
243
|
+
roots: list[Path] = []
|
|
244
|
+
for root_name in include_roots:
|
|
245
|
+
root = project_dir / root_name
|
|
246
|
+
if not root.is_dir():
|
|
247
|
+
_log.debug(
|
|
248
|
+
"iter_source_files: include_root missing, skipping: %s", root_name
|
|
249
|
+
)
|
|
250
|
+
continue
|
|
251
|
+
roots.append(root)
|
|
252
|
+
else:
|
|
253
|
+
roots = [project_dir]
|
|
254
|
+
|
|
255
|
+
import os
|
|
256
|
+
for root in roots:
|
|
257
|
+
for dirpath_str, dirnames, filenames in os.walk(str(root), topdown=True):
|
|
258
|
+
dirpath = Path(dirpath_str)
|
|
259
|
+
try:
|
|
260
|
+
rel_dir = dirpath.relative_to(project_dir)
|
|
261
|
+
except ValueError:
|
|
262
|
+
rel_dir = Path()
|
|
263
|
+
|
|
264
|
+
# Prune directories that match _DEFAULT_EXCLUSIONS
|
|
265
|
+
dirnames[:] = [d for d in dirnames if d not in _DEFAULT_EXCLUSIONS]
|
|
266
|
+
|
|
267
|
+
# Prune directories that match _EXCLUDED_SUBTREE_PREFIXES
|
|
268
|
+
dirnames_to_keep = []
|
|
269
|
+
for d in dirnames:
|
|
270
|
+
child_rel = rel_dir / d
|
|
271
|
+
child_posix = child_rel.as_posix() + "/"
|
|
272
|
+
if any(child_posix.startswith(prefix) for prefix in _EXCLUDED_SUBTREE_PREFIXES):
|
|
273
|
+
continue
|
|
274
|
+
dirnames_to_keep.append(d)
|
|
275
|
+
dirnames[:] = dirnames_to_keep
|
|
276
|
+
|
|
277
|
+
for fname in filenames:
|
|
278
|
+
src_file = dirpath / fname
|
|
279
|
+
if not src_file.is_file():
|
|
280
|
+
continue
|
|
281
|
+
if src_file.suffix.lower() not in target_exts:
|
|
282
|
+
continue
|
|
283
|
+
|
|
284
|
+
# Resolve path (no strict — file may be a symlink target)
|
|
285
|
+
resolved = src_file.resolve(strict=False)
|
|
286
|
+
|
|
287
|
+
# Skip if not inside project_dir (symlink escape)
|
|
288
|
+
try:
|
|
289
|
+
resolved.relative_to(project_dir)
|
|
290
|
+
except ValueError:
|
|
291
|
+
_log.debug(
|
|
292
|
+
"iter_source_files: skipping symlink escape: %s", src_file
|
|
293
|
+
)
|
|
294
|
+
continue
|
|
295
|
+
|
|
296
|
+
results.append(src_file)
|
|
297
|
+
|
|
298
|
+
results.sort()
|
|
299
|
+
_log.debug(
|
|
300
|
+
"iter_source_files: found %d files (languages=%r) under %s",
|
|
301
|
+
len(results),
|
|
302
|
+
languages,
|
|
303
|
+
project_dir,
|
|
304
|
+
)
|
|
305
|
+
yield from results
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def iter_py_files(
|
|
309
|
+
project_dir: Path,
|
|
310
|
+
include_roots: Sequence[str] | None = None,
|
|
311
|
+
) -> Iterator[Path]:
|
|
312
|
+
"""Yield absolute paths to .py files under project_dir.
|
|
313
|
+
|
|
314
|
+
Backward-compatible alias for ``iter_source_files(project_dir,
|
|
315
|
+
languages=["python"], include_roots=include_roots)``.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
project_dir: Root of the target project to scan.
|
|
319
|
+
include_roots: Optional list of subdirectory names relative to
|
|
320
|
+
project_dir to restrict the scan to. If None (default), the
|
|
321
|
+
entire project_dir is walked (minus exclusions).
|
|
322
|
+
|
|
323
|
+
Exclusions applied always:
|
|
324
|
+
- Any path component matching _DEFAULT_EXCLUSIONS (.git, __pycache__,
|
|
325
|
+
.venv, venv, env, node_modules, .tox, build, dist, .pytest_cache,
|
|
326
|
+
.mypy_cache, .ruff_cache, .cortex).
|
|
327
|
+
- SYSTEM/libs/ subtree (Vigil vendor bundle; harmless for user
|
|
328
|
+
projects where that path simply does not exist).
|
|
329
|
+
- Symlinks resolving outside project_dir.
|
|
330
|
+
|
|
331
|
+
Output is sorted for deterministic ordering.
|
|
332
|
+
"""
|
|
333
|
+
yield from iter_source_files(
|
|
334
|
+
project_dir,
|
|
335
|
+
languages=["python"],
|
|
336
|
+
include_roots=include_roots,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def make_metadata(
|
|
341
|
+
source: str,
|
|
342
|
+
confidence: float,
|
|
343
|
+
status: str,
|
|
344
|
+
evidence: tuple[str, ...] = (),
|
|
345
|
+
) -> dict:
|
|
346
|
+
"""Build a standard MapMetadata-compatible dict with UTC freshness."""
|
|
347
|
+
freshness = (
|
|
348
|
+
datetime.now(timezone.utc)
|
|
349
|
+
.isoformat()
|
|
350
|
+
.replace("+00:00", "Z")
|
|
351
|
+
)
|
|
352
|
+
return {
|
|
353
|
+
"source": source,
|
|
354
|
+
"evidence": list(evidence),
|
|
355
|
+
"confidence": confidence,
|
|
356
|
+
"freshness": freshness,
|
|
357
|
+
"status": status,
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def hotspot_mode_for_score(score: int) -> str:
|
|
362
|
+
"""Return recommended_mode string for a given hotspot score.
|
|
363
|
+
|
|
364
|
+
Thresholds: >=90 -> do_not_touch, >=60 -> forensic_first,
|
|
365
|
+
>=30 -> contained_refactor, else safe_refactor.
|
|
366
|
+
"""
|
|
367
|
+
for threshold, mode in _HOTSPOT_MODE_THRESHOLDS:
|
|
368
|
+
if score >= threshold:
|
|
369
|
+
return mode
|
|
370
|
+
return "safe_refactor"
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def get_file_inventory_cache(project_dir: Path) -> "Any":
|
|
374
|
+
"""Return a ParseCacheL2 instance for persistent on-disk parse caching.
|
|
375
|
+
|
|
376
|
+
Convenience wrapper so callers can obtain an L2 cache without importing
|
|
377
|
+
parse_cache directly. Returns a ``ParseCacheL2`` whose cache dir lives
|
|
378
|
+
under ``<project_dir>/.cortex/.map_cache/``.
|
|
379
|
+
"""
|
|
380
|
+
from .parse_cache import ParseCacheL2 # noqa: PLC0415
|
|
381
|
+
return ParseCacheL2(project_dir)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def update_file_inventory_cache(project_dir: Path, cache: "Any") -> None:
|
|
385
|
+
"""Flush and finalise the file inventory cache after a build.
|
|
386
|
+
|
|
387
|
+
Currently a no-op — ParseCacheL2 writes are already atomic — but
|
|
388
|
+
callers should call this so future cleanup logic can be added here.
|
|
389
|
+
"""
|
|
390
|
+
if cache is not None:
|
|
391
|
+
cache.flush()
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def get_coverage_metadata(builder_name: str) -> dict:
|
|
395
|
+
"""Return supported language coverage for a specific builder.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
builder_name: one of 'structural', 'runtime', 'data_contract', 'authority'
|
|
399
|
+
('contract' is accepted as alias for 'data_contract')
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
dict with 'supported_languages' and 'feature_matrix' keys.
|
|
403
|
+
Example:
|
|
404
|
+
{
|
|
405
|
+
'supported_languages': ['python', 'typescript', 'javascript', 'go', 'java'],
|
|
406
|
+
'feature_matrix': {
|
|
407
|
+
'python': {'imports': True, 'contracts': True, 'runtime': True, 'writes': True},
|
|
408
|
+
'typescript': {'imports': True, 'contracts': False, 'runtime': True, 'writes': False},
|
|
409
|
+
...
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
"""
|
|
413
|
+
from .source_adapters import ADAPTERS # noqa: PLC0415
|
|
414
|
+
|
|
415
|
+
# Normalize builder name (data_contract is primary, contract is alias)
|
|
416
|
+
normalized_name = 'contract' if builder_name in ('data_contract', 'contract') else builder_name
|
|
417
|
+
|
|
418
|
+
feature_map = {
|
|
419
|
+
'structural': lambda a: a.supports_structural,
|
|
420
|
+
'runtime': lambda a: a.supports_runtime_signals,
|
|
421
|
+
'contract': lambda a: a.supports_contracts,
|
|
422
|
+
'authority': lambda a: a.supports_authority_writes,
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
get_supported = feature_map.get(normalized_name, lambda a: False)
|
|
426
|
+
|
|
427
|
+
# Build feature matrix: language -> {feature: supported}
|
|
428
|
+
feature_matrix = {}
|
|
429
|
+
for ext, adapter in sorted(ADAPTERS.items()):
|
|
430
|
+
lang = adapter.language
|
|
431
|
+
if lang not in feature_matrix:
|
|
432
|
+
feature_matrix[lang] = {
|
|
433
|
+
'imports': adapter.supports_structural,
|
|
434
|
+
'contracts': adapter.supports_contracts,
|
|
435
|
+
'runtime': adapter.supports_runtime_signals,
|
|
436
|
+
'writes': adapter.supports_authority_writes,
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
# Supported languages for this builder
|
|
440
|
+
supported = [
|
|
441
|
+
adapter.language for adapter in ADAPTERS.values()
|
|
442
|
+
if get_supported(adapter)
|
|
443
|
+
]
|
|
444
|
+
supported = sorted(set(supported))
|
|
445
|
+
|
|
446
|
+
return {
|
|
447
|
+
'supported_languages': supported,
|
|
448
|
+
'feature_matrix': feature_matrix,
|
|
449
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Custom exceptions for the map builder subsystem.
|
|
2
|
+
|
|
3
|
+
Error taxonomy per plan sec.8. All errors are fail-loud -- no silent failures.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
import logging
|
|
7
|
+
_log = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"MapBuilderError",
|
|
11
|
+
"MapSchemaError",
|
|
12
|
+
"MapStorageError",
|
|
13
|
+
"MapConcurrencyError",
|
|
14
|
+
"MapIntegrityError",
|
|
15
|
+
"RuntimeTracerError",
|
|
16
|
+
"RuntimeTracerTimeoutError",
|
|
17
|
+
"MapSecurityError",
|
|
18
|
+
"MapBuildConflictBudgetExceeded",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MapBuilderError(Exception):
|
|
23
|
+
"""Base exception for all map builder errors."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MapSchemaError(MapBuilderError):
|
|
27
|
+
"""Schema version mismatch or unknown schema."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MapStorageError(MapBuilderError):
|
|
31
|
+
"""I/O or atomic write failure."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MapConcurrencyError(MapStorageError):
|
|
35
|
+
"""Filelock timeout -- concurrent writer held lock too long."""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class MapIntegrityError(MapBuilderError):
|
|
39
|
+
"""Map content invariant broken (missing required field, corrupt data)."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class RuntimeTracerError(MapBuilderError):
|
|
43
|
+
"""Subprocess tracer failure."""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class RuntimeTracerTimeoutError(RuntimeTracerError):
|
|
47
|
+
"""Subprocess tracer exceeded time budget."""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class MapSecurityError(MapBuilderError):
|
|
51
|
+
"""Path traversal attempt or other security violation."""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class MapBuildConflictBudgetExceeded(MapBuilderError):
|
|
55
|
+
"""Conflict count exceeded the allowed budget (default 500)."""
|