vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""Runtime tracer orchestrator -- Map 2 subprocess-based startup capture.
|
|
2
|
+
|
|
3
|
+
Launches a target Python module in an isolated subprocess with sys.settrace
|
|
4
|
+
and __import__ hooks installed INSIDE that subprocess only (never in parent).
|
|
5
|
+
|
|
6
|
+
This module is the PARENT-side orchestrator only. It:
|
|
7
|
+
- Sanitises environment (strips known API keys and secrets).
|
|
8
|
+
- Spawns runtime_tracer_entry as a subprocess.
|
|
9
|
+
- Reads the JSON output file written by the subprocess.
|
|
10
|
+
- Returns structured trace results for merging into RuntimeNode map.
|
|
11
|
+
|
|
12
|
+
Security guarantees (per plan sec.7.2):
|
|
13
|
+
- BLOCKED_ENV vars are never passed to subprocess.
|
|
14
|
+
- shell=False always -- no shell injection possible.
|
|
15
|
+
- Timeout enforced via subprocess.run(timeout=...).
|
|
16
|
+
|
|
17
|
+
Public API:
|
|
18
|
+
capture_startup_trace(target_module, target_argv, project_dir, timeout_s)
|
|
19
|
+
-> dict with keys: events, import_events, exit_code, duration_s, stderr
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
import logging
|
|
25
|
+
import os
|
|
26
|
+
import sys
|
|
27
|
+
import tempfile
|
|
28
|
+
import time
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Sequence
|
|
31
|
+
|
|
32
|
+
from .map_errors import RuntimeTracerError, RuntimeTracerTimeoutError
|
|
33
|
+
|
|
34
|
+
__all__ = ["capture_startup_trace", "BLOCKED_ENV"]
|
|
35
|
+
|
|
36
|
+
_log = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Security: blocked environment variable names (plan §7.2)
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
BLOCKED_ENV: frozenset[str] = frozenset({
|
|
43
|
+
"ANTHROPIC_API_KEY",
|
|
44
|
+
"GEMINI_API_KEY",
|
|
45
|
+
"OPENAI_API_KEY",
|
|
46
|
+
"GITHUB_TOKEN",
|
|
47
|
+
"AWS_SECRET_ACCESS_KEY",
|
|
48
|
+
"AWS_ACCESS_KEY_ID",
|
|
49
|
+
"SSH_AUTH_SOCK",
|
|
50
|
+
"PERPLEXITY_API_KEY",
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
# Helpers
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
def _build_sanitised_env(project_dir: Path | None) -> dict[str, str]:
|
|
59
|
+
"""Return os.environ copy with BLOCKED_ENV keys removed + tracer marker set."""
|
|
60
|
+
env = {k: v for k, v in os.environ.items() if k not in BLOCKED_ENV}
|
|
61
|
+
env["VIGIL_MAPPER_TRACE"] = "1"
|
|
62
|
+
|
|
63
|
+
if project_dir is not None:
|
|
64
|
+
existing_pp = env.get("PYTHONPATH", "")
|
|
65
|
+
project_str = str(project_dir.resolve())
|
|
66
|
+
if existing_pp:
|
|
67
|
+
env["PYTHONPATH"] = project_str + os.pathsep + existing_pp
|
|
68
|
+
else:
|
|
69
|
+
env["PYTHONPATH"] = project_str
|
|
70
|
+
|
|
71
|
+
return env
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _build_argv(
|
|
75
|
+
target_module: str,
|
|
76
|
+
temp_path: str,
|
|
77
|
+
timeout_s: float,
|
|
78
|
+
target_argv: Sequence[str],
|
|
79
|
+
) -> list[str]:
|
|
80
|
+
"""Build subprocess argv list (shell=False safe, no injection possible)."""
|
|
81
|
+
argv = [
|
|
82
|
+
sys.executable,
|
|
83
|
+
"-m",
|
|
84
|
+
"vigil_mapper.runtime_tracer_entry",
|
|
85
|
+
"--target", target_module,
|
|
86
|
+
"--out", temp_path,
|
|
87
|
+
"--timeout-s", str(timeout_s),
|
|
88
|
+
]
|
|
89
|
+
if target_argv:
|
|
90
|
+
argv += ["--", *target_argv]
|
|
91
|
+
return argv
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
# Public API
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
def capture_startup_trace(
|
|
99
|
+
target_module: str,
|
|
100
|
+
target_argv: Sequence[str] = (),
|
|
101
|
+
project_dir: Path | None = None,
|
|
102
|
+
timeout_s: float = 30.0,
|
|
103
|
+
) -> dict:
|
|
104
|
+
"""Capture startup trace of target_module by running it in a subprocess.
|
|
105
|
+
|
|
106
|
+
The subprocess installs sys.settrace and a __import__ hook, runs the
|
|
107
|
+
target, then writes a JSON file with all captured events.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
target_module: Dotted Python module name to run (e.g. "json" or
|
|
111
|
+
"mypackage.app").
|
|
112
|
+
target_argv: Arguments forwarded to the target module's sys.argv.
|
|
113
|
+
project_dir: If provided, prepended to subprocess PYTHONPATH so the
|
|
114
|
+
target module can be imported.
|
|
115
|
+
timeout_s: Maximum seconds to allow the subprocess to run. Hard kill
|
|
116
|
+
at timeout_s + 5 seconds via subprocess.run timeout param.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
dict with keys:
|
|
120
|
+
events - list[dict]: call-level trace events
|
|
121
|
+
import_events - list[dict]: import hook events
|
|
122
|
+
exit_code - int: target exit code (0 = normal)
|
|
123
|
+
duration_s - float: elapsed time inside subprocess
|
|
124
|
+
stderr - str: subprocess stderr output
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
RuntimeTracerTimeoutError: If subprocess does not complete within
|
|
128
|
+
timeout_s + 5 seconds.
|
|
129
|
+
RuntimeTracerError: If the tracer entry itself fails to produce valid
|
|
130
|
+
JSON output on a zero exit.
|
|
131
|
+
"""
|
|
132
|
+
import subprocess
|
|
133
|
+
|
|
134
|
+
if project_dir is not None:
|
|
135
|
+
project_dir = project_dir.resolve()
|
|
136
|
+
|
|
137
|
+
env = _build_sanitised_env(project_dir)
|
|
138
|
+
|
|
139
|
+
# Create temp file. We close the fd immediately and pass the path to the
|
|
140
|
+
# subprocess. The subprocess writes JSON there. We read it AFTER the
|
|
141
|
+
# subprocess finishes (before we unlink).
|
|
142
|
+
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".json", prefix="vigil_trace_")
|
|
143
|
+
os.close(tmp_fd)
|
|
144
|
+
|
|
145
|
+
argv = _build_argv(target_module, tmp_path, timeout_s, target_argv)
|
|
146
|
+
|
|
147
|
+
_log.info(
|
|
148
|
+
"capture_startup_trace: spawning subprocess for target=%r timeout=%.1fs",
|
|
149
|
+
target_module,
|
|
150
|
+
timeout_s,
|
|
151
|
+
)
|
|
152
|
+
t_wall = time.perf_counter()
|
|
153
|
+
|
|
154
|
+
timed_out = False
|
|
155
|
+
proc = None
|
|
156
|
+
try:
|
|
157
|
+
proc = subprocess.run(
|
|
158
|
+
argv,
|
|
159
|
+
env=env,
|
|
160
|
+
capture_output=True,
|
|
161
|
+
text=True,
|
|
162
|
+
encoding="utf-8",
|
|
163
|
+
errors="replace",
|
|
164
|
+
timeout=timeout_s + 5.0,
|
|
165
|
+
shell=False,
|
|
166
|
+
)
|
|
167
|
+
except subprocess.TimeoutExpired as exc:
|
|
168
|
+
timed_out = True
|
|
169
|
+
_log.error(
|
|
170
|
+
"capture_startup_trace: subprocess timed out after %.1fs for target=%r",
|
|
171
|
+
timeout_s + 5.0,
|
|
172
|
+
target_module,
|
|
173
|
+
)
|
|
174
|
+
raise RuntimeTracerTimeoutError(
|
|
175
|
+
"runtime tracer timed out after %.1fs for target %r" % (timeout_s + 5.0, target_module)
|
|
176
|
+
) from exc
|
|
177
|
+
finally:
|
|
178
|
+
if timed_out:
|
|
179
|
+
# On timeout we can't read a partial file — just clean up.
|
|
180
|
+
try:
|
|
181
|
+
os.unlink(tmp_path)
|
|
182
|
+
except OSError:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
wall_elapsed = time.perf_counter() - t_wall
|
|
186
|
+
|
|
187
|
+
# Read output JSON from temp file, then clean up.
|
|
188
|
+
payload: dict = {}
|
|
189
|
+
read_error: str | None = None
|
|
190
|
+
try:
|
|
191
|
+
raw = Path(tmp_path).read_text(encoding="utf-8")
|
|
192
|
+
if raw.strip():
|
|
193
|
+
payload = json.loads(raw)
|
|
194
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
195
|
+
read_error = str(exc)
|
|
196
|
+
finally:
|
|
197
|
+
try:
|
|
198
|
+
os.unlink(tmp_path)
|
|
199
|
+
except OSError:
|
|
200
|
+
pass
|
|
201
|
+
|
|
202
|
+
if proc.returncode != 0:
|
|
203
|
+
_log.error(
|
|
204
|
+
"capture_startup_trace: subprocess exited with code %d for target=%r stderr=%r",
|
|
205
|
+
proc.returncode,
|
|
206
|
+
target_module,
|
|
207
|
+
proc.stderr[:500] if proc.stderr else "",
|
|
208
|
+
)
|
|
209
|
+
# Return partial result (events captured so far) — caller decides degraded mode.
|
|
210
|
+
return {
|
|
211
|
+
"events": payload.get("events", []),
|
|
212
|
+
"import_events": payload.get("import_events", []),
|
|
213
|
+
"exit_code": proc.returncode,
|
|
214
|
+
"duration_s": payload.get("duration_s", wall_elapsed),
|
|
215
|
+
"stderr": proc.stderr or "",
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if read_error is not None:
|
|
219
|
+
raise RuntimeTracerError(
|
|
220
|
+
"runtime tracer for target %r exited 0 but output file unreadable: %s"
|
|
221
|
+
% (target_module, read_error)
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
if not payload:
|
|
225
|
+
raise RuntimeTracerError(
|
|
226
|
+
"runtime tracer for target %r exited 0 but produced empty JSON output"
|
|
227
|
+
% target_module
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
_log.debug(
|
|
231
|
+
"capture_startup_trace: done exit=0 wall=%.2fs events=%d imports=%d",
|
|
232
|
+
wall_elapsed,
|
|
233
|
+
len(payload.get("events", [])),
|
|
234
|
+
len(payload.get("import_events", [])),
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
"events": payload.get("events", []),
|
|
239
|
+
"import_events": payload.get("import_events", []),
|
|
240
|
+
"exit_code": payload.get("exit_code", 0),
|
|
241
|
+
"duration_s": payload.get("duration_s", wall_elapsed),
|
|
242
|
+
"stderr": proc.stderr or "",
|
|
243
|
+
}
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""Subprocess entrypoint for runtime startup tracing.
|
|
2
|
+
|
|
3
|
+
CRITICAL: This file must NEVER be imported by the parent process or any
|
|
4
|
+
other module. It is designed to run only as a __main__ subprocess via:
|
|
5
|
+
python -m vigil_mapper.runtime_tracer_entry
|
|
6
|
+
|
|
7
|
+
It installs sys.settrace and a __import__ hook, runs the target module,
|
|
8
|
+
captures all call events and import events, and writes a JSON file to
|
|
9
|
+
the path given by --out.
|
|
10
|
+
|
|
11
|
+
Safety net: refuses to run unless VIGIL_MAPPER_TRACE=1 is set
|
|
12
|
+
in the environment, preventing accidental in-process execution.
|
|
13
|
+
|
|
14
|
+
Do NOT import this module. Do NOT add it to __all__ in __init__.py.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import builtins
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import runpy
|
|
23
|
+
import sys
|
|
24
|
+
import time
|
|
25
|
+
from typing import Any
|
|
26
|
+
import logging
|
|
27
|
+
_log = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def main() -> int:
|
|
31
|
+
# ------------------------------------------------------------------
|
|
32
|
+
# Safety net: require VIGIL_MAPPER_TRACE=1.
|
|
33
|
+
# This prevents accidental execution in-process by parent code that
|
|
34
|
+
# accidentally imports this module.
|
|
35
|
+
# ------------------------------------------------------------------
|
|
36
|
+
if os.environ.get("VIGIL_MAPPER_TRACE") != "1":
|
|
37
|
+
print(
|
|
38
|
+
"ERROR: runtime_tracer_entry must only run as a subprocess with "
|
|
39
|
+
"VIGIL_MAPPER_TRACE=1 set. Refusing to execute.",
|
|
40
|
+
file=sys.stderr,
|
|
41
|
+
)
|
|
42
|
+
return 2
|
|
43
|
+
|
|
44
|
+
# ------------------------------------------------------------------
|
|
45
|
+
# Argument parsing.
|
|
46
|
+
# We must handle `-- target_argv` manually: argparse stops at `--`.
|
|
47
|
+
# ------------------------------------------------------------------
|
|
48
|
+
parser = argparse.ArgumentParser(
|
|
49
|
+
description="Subprocess entrypoint for cortex runtime tracer.",
|
|
50
|
+
add_help=True,
|
|
51
|
+
)
|
|
52
|
+
parser.add_argument("--target", required=True, help="Dotted module name to run.")
|
|
53
|
+
parser.add_argument("--out", required=True, help="Path to write JSON output.")
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"--timeout-s",
|
|
56
|
+
type=float,
|
|
57
|
+
default=30.0,
|
|
58
|
+
help="Soft time budget for the target (informational only here).",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Split off target argv (everything after `--`).
|
|
62
|
+
raw_args = sys.argv[1:]
|
|
63
|
+
target_argv_start: list[str] = []
|
|
64
|
+
if "--" in raw_args:
|
|
65
|
+
sep_idx = raw_args.index("--")
|
|
66
|
+
target_argv_start = raw_args[sep_idx + 1:]
|
|
67
|
+
raw_args = raw_args[:sep_idx]
|
|
68
|
+
|
|
69
|
+
args = parser.parse_args(raw_args)
|
|
70
|
+
target_module: str = args.target
|
|
71
|
+
out_path: str = args.out
|
|
72
|
+
|
|
73
|
+
# ------------------------------------------------------------------
|
|
74
|
+
# Local accumulators (NOT module-level globals — plan ban).
|
|
75
|
+
# ------------------------------------------------------------------
|
|
76
|
+
events: list[dict[str, Any]] = []
|
|
77
|
+
import_events: list[dict[str, Any]] = []
|
|
78
|
+
t0 = time.perf_counter()
|
|
79
|
+
|
|
80
|
+
# ------------------------------------------------------------------
|
|
81
|
+
# Save original trace/import hooks so we can restore them in finally.
|
|
82
|
+
# ------------------------------------------------------------------
|
|
83
|
+
orig_settrace = sys.gettrace()
|
|
84
|
+
orig_import = builtins.__import__
|
|
85
|
+
|
|
86
|
+
exit_code: int = 0
|
|
87
|
+
exception_info: dict[str, str] | None = None
|
|
88
|
+
|
|
89
|
+
# ------------------------------------------------------------------
|
|
90
|
+
# Skip-list for trace events: exclude non-user code.
|
|
91
|
+
# ------------------------------------------------------------------
|
|
92
|
+
_skip_fragments = (
|
|
93
|
+
os.sep + "libs" + os.sep,
|
|
94
|
+
"__pycache__",
|
|
95
|
+
"frozen importlib",
|
|
96
|
+
"importlib",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def _should_skip_filename(fname: str) -> bool:
|
|
100
|
+
if not fname:
|
|
101
|
+
return True
|
|
102
|
+
if fname.startswith("<"):
|
|
103
|
+
return True
|
|
104
|
+
for frag in _skip_fragments:
|
|
105
|
+
if frag in fname:
|
|
106
|
+
return True
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
# ------------------------------------------------------------------
|
|
110
|
+
# Install sys.settrace call tracer.
|
|
111
|
+
# ------------------------------------------------------------------
|
|
112
|
+
def tracefunc(frame: Any, event: str, arg: Any) -> Any:
|
|
113
|
+
if event == "call":
|
|
114
|
+
fname = frame.f_code.co_filename
|
|
115
|
+
if not _should_skip_filename(fname):
|
|
116
|
+
qualname = frame.f_code.co_qualname if hasattr(frame.f_code, "co_qualname") else frame.f_code.co_name
|
|
117
|
+
events.append({
|
|
118
|
+
"event": "call",
|
|
119
|
+
"qualname": qualname,
|
|
120
|
+
"filename": fname,
|
|
121
|
+
"lineno": frame.f_lineno,
|
|
122
|
+
"ts": time.perf_counter() - t0,
|
|
123
|
+
})
|
|
124
|
+
return tracefunc
|
|
125
|
+
|
|
126
|
+
# ------------------------------------------------------------------
|
|
127
|
+
# Install __import__ hook to capture import events.
|
|
128
|
+
# ------------------------------------------------------------------
|
|
129
|
+
def traced_import(name: str, *import_args: Any, **import_kwargs: Any) -> Any:
|
|
130
|
+
import_events.append({
|
|
131
|
+
"event": "import",
|
|
132
|
+
"module": name,
|
|
133
|
+
"ts": time.perf_counter() - t0,
|
|
134
|
+
})
|
|
135
|
+
return orig_import(name, *import_args, **import_kwargs)
|
|
136
|
+
|
|
137
|
+
# ------------------------------------------------------------------
|
|
138
|
+
# Run the target module with hooks installed.
|
|
139
|
+
# ------------------------------------------------------------------
|
|
140
|
+
sys.settrace(tracefunc)
|
|
141
|
+
builtins.__import__ = traced_import
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
# Set sys.argv so the target module sees appropriate argv.
|
|
145
|
+
sys.argv = [target_module] + list(target_argv_start)
|
|
146
|
+
runpy.run_module(target_module, run_name="__main__", alter_sys=True)
|
|
147
|
+
exit_code = 0
|
|
148
|
+
|
|
149
|
+
except SystemExit as exc:
|
|
150
|
+
# Normal CLI exit — not a failure.
|
|
151
|
+
code = exc.code
|
|
152
|
+
if code is None:
|
|
153
|
+
exit_code = 0
|
|
154
|
+
elif isinstance(code, int):
|
|
155
|
+
exit_code = code
|
|
156
|
+
else:
|
|
157
|
+
# SystemExit with a string message → treat as error (code 1).
|
|
158
|
+
exit_code = 1
|
|
159
|
+
|
|
160
|
+
except Exception as exc: # noqa: BLE001 -- intentional broad catch for target
|
|
161
|
+
exit_code = 2
|
|
162
|
+
exception_info = {
|
|
163
|
+
"type": type(exc).__name__,
|
|
164
|
+
"message": str(exc),
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
finally:
|
|
168
|
+
# Always restore hooks — plan §4b critical requirement.
|
|
169
|
+
sys.settrace(None)
|
|
170
|
+
builtins.__import__ = orig_import
|
|
171
|
+
|
|
172
|
+
# ------------------------------------------------------------------
|
|
173
|
+
# Write JSON output to the file specified by --out.
|
|
174
|
+
# ------------------------------------------------------------------
|
|
175
|
+
duration_s = time.perf_counter() - t0
|
|
176
|
+
output: dict[str, Any] = {
|
|
177
|
+
"events": events,
|
|
178
|
+
"import_events": import_events,
|
|
179
|
+
"exit_code": exit_code,
|
|
180
|
+
"duration_s": duration_s,
|
|
181
|
+
}
|
|
182
|
+
if exception_info is not None:
|
|
183
|
+
output["exception"] = exception_info
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
out_text = json.dumps(output, ensure_ascii=False)
|
|
187
|
+
with open(out_path, "w", encoding="utf-8") as fh:
|
|
188
|
+
fh.write(out_text)
|
|
189
|
+
except OSError as exc:
|
|
190
|
+
print("ERROR: failed to write output file %r: %s" % (out_path, exc), file=sys.stderr)
|
|
191
|
+
return 3
|
|
192
|
+
|
|
193
|
+
# Return 0 if target exited normally (SystemExit 0 or clean return),
|
|
194
|
+
# 2 if target raised an unhandled exception.
|
|
195
|
+
return 0 if exit_code in (0,) else 2
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
if __name__ == "__main__":
|
|
199
|
+
sys.exit(main())
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Semantic diff for map JSON files, ignoring timestamp-like fields.
|
|
2
|
+
|
|
3
|
+
Used by rebuild worker (Phase E1) to decide whether to promote temp-dir
|
|
4
|
+
rebuild output to canonical location -- skip write if content unchanged
|
|
5
|
+
modulo ignored timestamps.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
__all__ = ["semantic_map_diff"]
|
|
15
|
+
|
|
16
|
+
_log = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
_IGNORED_FIELDS: frozenset[str] = frozenset({
|
|
19
|
+
# Build timestamps / envelope — never semantic content
|
|
20
|
+
"built_at",
|
|
21
|
+
"freshness",
|
|
22
|
+
"produced_by",
|
|
23
|
+
"build_duration_s",
|
|
24
|
+
"generated_at",
|
|
25
|
+
"map_name",
|
|
26
|
+
# Build-environment git metadata (hotspot map only — see cli_entry.py
|
|
27
|
+
# `_hotspot_churn_meta`). `git_head_sha` leaks into the payload when any
|
|
28
|
+
# git activity occurs between the two rebuild runs (commit, branch move,
|
|
29
|
+
# rebase). `churn_source` toggles between "git_log_numstat",
|
|
30
|
+
# "git_log_numstat_empty", and "skipped" depending on whether the index
|
|
31
|
+
# refresh picked up a git hiccup. `since_window` is stable for a given
|
|
32
|
+
# build config but belongs to the same metadata class and is therefore
|
|
33
|
+
# excluded for consistency. Excluding these three fields keeps the
|
|
34
|
+
# semantic-diff focused on map *content*, not build environment.
|
|
35
|
+
"git_head_sha",
|
|
36
|
+
"churn_source",
|
|
37
|
+
"since_window",
|
|
38
|
+
# Derived size of the serialized map file — jitters ±1 byte whenever
|
|
39
|
+
# build_duration_s crosses a decimal-digit boundary (e.g. "0.42" → "0.395").
|
|
40
|
+
# Recorded in 00_map_index.json per-map entry; not semantic payload.
|
|
41
|
+
"file_bytes",
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def semantic_map_diff(new_path: Path, old_path: Path) -> bool:
|
|
46
|
+
"""Return True if two map JSONs are semantically identical (ignoring timestamp fields).
|
|
47
|
+
|
|
48
|
+
Returns True (identical) if both parse and stripped structures equal.
|
|
49
|
+
Returns False if either file unreadable/invalid JSON or content differs.
|
|
50
|
+
Fail-safe: any error -> False (treat as "changed", triggers write).
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
new_data = json.loads(new_path.read_text(encoding="utf-8"))
|
|
54
|
+
old_data = json.loads(old_path.read_text(encoding="utf-8"))
|
|
55
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
56
|
+
_log.debug(
|
|
57
|
+
"semantic_map_diff: read/parse failed (%s): %s",
|
|
58
|
+
type(exc).__name__,
|
|
59
|
+
new_path,
|
|
60
|
+
)
|
|
61
|
+
return False # treat as changed on any error
|
|
62
|
+
return _strip_ignored(new_data) == _strip_ignored(old_data)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _strip_ignored(obj: Any) -> Any:
|
|
66
|
+
"""Recursively strip ignored fields from nested dict/list structure."""
|
|
67
|
+
if isinstance(obj, dict):
|
|
68
|
+
return {k: _strip_ignored(v) for k, v in obj.items() if k not in _IGNORED_FIELDS}
|
|
69
|
+
if isinstance(obj, list):
|
|
70
|
+
return [_strip_ignored(x) for x in obj]
|
|
71
|
+
return obj
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Source adapter registry and dispatch helpers.
|
|
2
|
+
|
|
3
|
+
Public API:
|
|
4
|
+
ADAPTERS -- dict mapping file extension -> SourceAdapter instance.
|
|
5
|
+
get_adapter_for_file -- return adapter for a given Path (by extension).
|
|
6
|
+
supported_extensions -- tuple of all currently registered extensions.
|
|
7
|
+
SourceAdapter -- Protocol for type-checking adapter compliance.
|
|
8
|
+
RegexAdapterBase -- Base class for regex-based adapters (L2+ languages).
|
|
9
|
+
IR signal classes -- ImportEdge, SymbolDef, ContractCandidate,
|
|
10
|
+
RuntimeSignal, AuthorityWriteCandidate.
|
|
11
|
+
|
|
12
|
+
Registry population (current state — all 5 languages registered):
|
|
13
|
+
Python (.py), TypeScript (.ts, .tsx), JavaScript (.js, .jsx),
|
|
14
|
+
Go (.go), Java (.java).
|
|
15
|
+
Historical note: L1 shipped Python only; L2 added TS/JS; L5 added Go/Java.
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from ._base import RegexAdapterBase, SourceAdapter
|
|
23
|
+
from ._ir import (
|
|
24
|
+
AuthorityWriteCandidate,
|
|
25
|
+
ContractCandidate,
|
|
26
|
+
ImportEdge,
|
|
27
|
+
RuntimeSignal,
|
|
28
|
+
TSRuntimeSignal,
|
|
29
|
+
SymbolDef,
|
|
30
|
+
)
|
|
31
|
+
from .go import GoAdapter
|
|
32
|
+
from .java import JavaAdapter
|
|
33
|
+
from .javascript import JavascriptAdapter
|
|
34
|
+
from .python import PythonAdapter
|
|
35
|
+
from .typescript import TypescriptAdapter
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"ADAPTERS",
|
|
39
|
+
"get_adapter_for_file",
|
|
40
|
+
"supported_extensions",
|
|
41
|
+
"SourceAdapter",
|
|
42
|
+
"RegexAdapterBase",
|
|
43
|
+
"ImportEdge",
|
|
44
|
+
"SymbolDef",
|
|
45
|
+
"ContractCandidate",
|
|
46
|
+
"RuntimeSignal",
|
|
47
|
+
"TSRuntimeSignal",
|
|
48
|
+
"AuthorityWriteCandidate",
|
|
49
|
+
"PythonAdapter",
|
|
50
|
+
"TypescriptAdapter",
|
|
51
|
+
"JavascriptAdapter",
|
|
52
|
+
"GoAdapter",
|
|
53
|
+
"JavaAdapter",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
_log = logging.getLogger(__name__)
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# Static adapter registry — keyed by lowercase file extension
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
# Populated at import time via _register(). Extensions must be lowercase.
|
|
63
|
+
ADAPTERS: dict[str, SourceAdapter] = {}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _register(adapter: SourceAdapter) -> None:
|
|
67
|
+
"""Register *adapter* for each extension it declares.
|
|
68
|
+
|
|
69
|
+
Extensions are stored in lowercase. Raises ValueError if an extension is
|
|
70
|
+
already registered (prevents silent override during development).
|
|
71
|
+
"""
|
|
72
|
+
for ext in adapter.file_extensions:
|
|
73
|
+
key = ext.lower()
|
|
74
|
+
if key in ADAPTERS:
|
|
75
|
+
raise ValueError(
|
|
76
|
+
f"Duplicate adapter registration for extension {key!r}: "
|
|
77
|
+
f"existing={ADAPTERS[key].__class__.__name__!r}, "
|
|
78
|
+
f"new={adapter.__class__.__name__!r}"
|
|
79
|
+
)
|
|
80
|
+
ADAPTERS[key] = adapter
|
|
81
|
+
_log.debug("source_adapters: registered %s for %r", adapter.__class__.__name__, key)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# All 5 adapters registered: Python, TypeScript, JavaScript, Go, Java.
|
|
85
|
+
_register(PythonAdapter())
|
|
86
|
+
_register(TypescriptAdapter())
|
|
87
|
+
_register(JavascriptAdapter())
|
|
88
|
+
_register(GoAdapter())
|
|
89
|
+
_register(JavaAdapter())
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
# Dispatch helpers
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
def get_adapter_for_file(path: Path) -> SourceAdapter | None:
|
|
97
|
+
"""Return the registered adapter for *path*'s extension, or None.
|
|
98
|
+
|
|
99
|
+
Extension lookup is case-insensitive: ``Path("FOO.PY")`` resolves to the
|
|
100
|
+
same adapter as ``Path("foo.py")``.
|
|
101
|
+
|
|
102
|
+
Returns None for extensions with no registered adapter (e.g. ``.ts`` in L1).
|
|
103
|
+
"""
|
|
104
|
+
return ADAPTERS.get(path.suffix.lower())
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def supported_extensions() -> tuple[str, ...]:
|
|
108
|
+
"""Return a sorted tuple of all currently registered file extensions."""
|
|
109
|
+
return tuple(sorted(ADAPTERS.keys()))
|