vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,243 @@
1
+ """Runtime tracer orchestrator -- Map 2 subprocess-based startup capture.
2
+
3
+ Launches a target Python module in an isolated subprocess with sys.settrace
4
+ and __import__ hooks installed INSIDE that subprocess only (never in parent).
5
+
6
+ This module is the PARENT-side orchestrator only. It:
7
+ - Sanitises environment (strips known API keys and secrets).
8
+ - Spawns runtime_tracer_entry as a subprocess.
9
+ - Reads the JSON output file written by the subprocess.
10
+ - Returns structured trace results for merging into RuntimeNode map.
11
+
12
+ Security guarantees (per plan sec.7.2):
13
+ - BLOCKED_ENV vars are never passed to subprocess.
14
+ - shell=False always -- no shell injection possible.
15
+ - Timeout enforced via subprocess.run(timeout=...).
16
+
17
+ Public API:
18
+ capture_startup_trace(target_module, target_argv, project_dir, timeout_s)
19
+ -> dict with keys: events, import_events, exit_code, duration_s, stderr
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ import logging
25
+ import os
26
+ import sys
27
+ import tempfile
28
+ import time
29
+ from pathlib import Path
30
+ from typing import Sequence
31
+
32
+ from .map_errors import RuntimeTracerError, RuntimeTracerTimeoutError
33
+
34
+ __all__ = ["capture_startup_trace", "BLOCKED_ENV"]
35
+
36
+ _log = logging.getLogger(__name__)
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Security: blocked environment variable names (plan §7.2)
40
+ # ---------------------------------------------------------------------------
41
+
42
+ BLOCKED_ENV: frozenset[str] = frozenset({
43
+ "ANTHROPIC_API_KEY",
44
+ "GEMINI_API_KEY",
45
+ "OPENAI_API_KEY",
46
+ "GITHUB_TOKEN",
47
+ "AWS_SECRET_ACCESS_KEY",
48
+ "AWS_ACCESS_KEY_ID",
49
+ "SSH_AUTH_SOCK",
50
+ "PERPLEXITY_API_KEY",
51
+ })
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Helpers
56
+ # ---------------------------------------------------------------------------
57
+
58
+ def _build_sanitised_env(project_dir: Path | None) -> dict[str, str]:
59
+ """Return os.environ copy with BLOCKED_ENV keys removed + tracer marker set."""
60
+ env = {k: v for k, v in os.environ.items() if k not in BLOCKED_ENV}
61
+ env["VIGIL_MAPPER_TRACE"] = "1"
62
+
63
+ if project_dir is not None:
64
+ existing_pp = env.get("PYTHONPATH", "")
65
+ project_str = str(project_dir.resolve())
66
+ if existing_pp:
67
+ env["PYTHONPATH"] = project_str + os.pathsep + existing_pp
68
+ else:
69
+ env["PYTHONPATH"] = project_str
70
+
71
+ return env
72
+
73
+
74
+ def _build_argv(
75
+ target_module: str,
76
+ temp_path: str,
77
+ timeout_s: float,
78
+ target_argv: Sequence[str],
79
+ ) -> list[str]:
80
+ """Build subprocess argv list (shell=False safe, no injection possible)."""
81
+ argv = [
82
+ sys.executable,
83
+ "-m",
84
+ "vigil_mapper.runtime_tracer_entry",
85
+ "--target", target_module,
86
+ "--out", temp_path,
87
+ "--timeout-s", str(timeout_s),
88
+ ]
89
+ if target_argv:
90
+ argv += ["--", *target_argv]
91
+ return argv
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # Public API
96
+ # ---------------------------------------------------------------------------
97
+
98
+ def capture_startup_trace(
99
+ target_module: str,
100
+ target_argv: Sequence[str] = (),
101
+ project_dir: Path | None = None,
102
+ timeout_s: float = 30.0,
103
+ ) -> dict:
104
+ """Capture startup trace of target_module by running it in a subprocess.
105
+
106
+ The subprocess installs sys.settrace and a __import__ hook, runs the
107
+ target, then writes a JSON file with all captured events.
108
+
109
+ Args:
110
+ target_module: Dotted Python module name to run (e.g. "json" or
111
+ "mypackage.app").
112
+ target_argv: Arguments forwarded to the target module's sys.argv.
113
+ project_dir: If provided, prepended to subprocess PYTHONPATH so the
114
+ target module can be imported.
115
+ timeout_s: Maximum seconds to allow the subprocess to run. Hard kill
116
+ at timeout_s + 5 seconds via subprocess.run timeout param.
117
+
118
+ Returns:
119
+ dict with keys:
120
+ events - list[dict]: call-level trace events
121
+ import_events - list[dict]: import hook events
122
+ exit_code - int: target exit code (0 = normal)
123
+ duration_s - float: elapsed time inside subprocess
124
+ stderr - str: subprocess stderr output
125
+
126
+ Raises:
127
+ RuntimeTracerTimeoutError: If subprocess does not complete within
128
+ timeout_s + 5 seconds.
129
+ RuntimeTracerError: If the tracer entry itself fails to produce valid
130
+ JSON output on a zero exit.
131
+ """
132
+ import subprocess
133
+
134
+ if project_dir is not None:
135
+ project_dir = project_dir.resolve()
136
+
137
+ env = _build_sanitised_env(project_dir)
138
+
139
+ # Create temp file. We close the fd immediately and pass the path to the
140
+ # subprocess. The subprocess writes JSON there. We read it AFTER the
141
+ # subprocess finishes (before we unlink).
142
+ tmp_fd, tmp_path = tempfile.mkstemp(suffix=".json", prefix="vigil_trace_")
143
+ os.close(tmp_fd)
144
+
145
+ argv = _build_argv(target_module, tmp_path, timeout_s, target_argv)
146
+
147
+ _log.info(
148
+ "capture_startup_trace: spawning subprocess for target=%r timeout=%.1fs",
149
+ target_module,
150
+ timeout_s,
151
+ )
152
+ t_wall = time.perf_counter()
153
+
154
+ timed_out = False
155
+ proc = None
156
+ try:
157
+ proc = subprocess.run(
158
+ argv,
159
+ env=env,
160
+ capture_output=True,
161
+ text=True,
162
+ encoding="utf-8",
163
+ errors="replace",
164
+ timeout=timeout_s + 5.0,
165
+ shell=False,
166
+ )
167
+ except subprocess.TimeoutExpired as exc:
168
+ timed_out = True
169
+ _log.error(
170
+ "capture_startup_trace: subprocess timed out after %.1fs for target=%r",
171
+ timeout_s + 5.0,
172
+ target_module,
173
+ )
174
+ raise RuntimeTracerTimeoutError(
175
+ "runtime tracer timed out after %.1fs for target %r" % (timeout_s + 5.0, target_module)
176
+ ) from exc
177
+ finally:
178
+ if timed_out:
179
+ # On timeout we can't read a partial file — just clean up.
180
+ try:
181
+ os.unlink(tmp_path)
182
+ except OSError:
183
+ pass
184
+
185
+ wall_elapsed = time.perf_counter() - t_wall
186
+
187
+ # Read output JSON from temp file, then clean up.
188
+ payload: dict = {}
189
+ read_error: str | None = None
190
+ try:
191
+ raw = Path(tmp_path).read_text(encoding="utf-8")
192
+ if raw.strip():
193
+ payload = json.loads(raw)
194
+ except (OSError, json.JSONDecodeError) as exc:
195
+ read_error = str(exc)
196
+ finally:
197
+ try:
198
+ os.unlink(tmp_path)
199
+ except OSError:
200
+ pass
201
+
202
+ if proc.returncode != 0:
203
+ _log.error(
204
+ "capture_startup_trace: subprocess exited with code %d for target=%r stderr=%r",
205
+ proc.returncode,
206
+ target_module,
207
+ proc.stderr[:500] if proc.stderr else "",
208
+ )
209
+ # Return partial result (events captured so far) — caller decides degraded mode.
210
+ return {
211
+ "events": payload.get("events", []),
212
+ "import_events": payload.get("import_events", []),
213
+ "exit_code": proc.returncode,
214
+ "duration_s": payload.get("duration_s", wall_elapsed),
215
+ "stderr": proc.stderr or "",
216
+ }
217
+
218
+ if read_error is not None:
219
+ raise RuntimeTracerError(
220
+ "runtime tracer for target %r exited 0 but output file unreadable: %s"
221
+ % (target_module, read_error)
222
+ )
223
+
224
+ if not payload:
225
+ raise RuntimeTracerError(
226
+ "runtime tracer for target %r exited 0 but produced empty JSON output"
227
+ % target_module
228
+ )
229
+
230
+ _log.debug(
231
+ "capture_startup_trace: done exit=0 wall=%.2fs events=%d imports=%d",
232
+ wall_elapsed,
233
+ len(payload.get("events", [])),
234
+ len(payload.get("import_events", [])),
235
+ )
236
+
237
+ return {
238
+ "events": payload.get("events", []),
239
+ "import_events": payload.get("import_events", []),
240
+ "exit_code": payload.get("exit_code", 0),
241
+ "duration_s": payload.get("duration_s", wall_elapsed),
242
+ "stderr": proc.stderr or "",
243
+ }
@@ -0,0 +1,199 @@
1
+ """Subprocess entrypoint for runtime startup tracing.
2
+
3
+ CRITICAL: This file must NEVER be imported by the parent process or any
4
+ other module. It is designed to run only as a __main__ subprocess via:
5
+ python -m vigil_mapper.runtime_tracer_entry
6
+
7
+ It installs sys.settrace and a __import__ hook, runs the target module,
8
+ captures all call events and import events, and writes a JSON file to
9
+ the path given by --out.
10
+
11
+ Safety net: refuses to run unless VIGIL_MAPPER_TRACE=1 is set
12
+ in the environment, preventing accidental in-process execution.
13
+
14
+ Do NOT import this module. Do NOT add it to __all__ in __init__.py.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import builtins
20
+ import json
21
+ import os
22
+ import runpy
23
+ import sys
24
+ import time
25
+ from typing import Any
26
+ import logging
27
+ _log = logging.getLogger(__name__)
28
+
29
+
30
+ def main() -> int:
31
+ # ------------------------------------------------------------------
32
+ # Safety net: require VIGIL_MAPPER_TRACE=1.
33
+ # This prevents accidental execution in-process by parent code that
34
+ # accidentally imports this module.
35
+ # ------------------------------------------------------------------
36
+ if os.environ.get("VIGIL_MAPPER_TRACE") != "1":
37
+ print(
38
+ "ERROR: runtime_tracer_entry must only run as a subprocess with "
39
+ "VIGIL_MAPPER_TRACE=1 set. Refusing to execute.",
40
+ file=sys.stderr,
41
+ )
42
+ return 2
43
+
44
+ # ------------------------------------------------------------------
45
+ # Argument parsing.
46
+ # We must handle `-- target_argv` manually: argparse stops at `--`.
47
+ # ------------------------------------------------------------------
48
+ parser = argparse.ArgumentParser(
49
+ description="Subprocess entrypoint for cortex runtime tracer.",
50
+ add_help=True,
51
+ )
52
+ parser.add_argument("--target", required=True, help="Dotted module name to run.")
53
+ parser.add_argument("--out", required=True, help="Path to write JSON output.")
54
+ parser.add_argument(
55
+ "--timeout-s",
56
+ type=float,
57
+ default=30.0,
58
+ help="Soft time budget for the target (informational only here).",
59
+ )
60
+
61
+ # Split off target argv (everything after `--`).
62
+ raw_args = sys.argv[1:]
63
+ target_argv_start: list[str] = []
64
+ if "--" in raw_args:
65
+ sep_idx = raw_args.index("--")
66
+ target_argv_start = raw_args[sep_idx + 1:]
67
+ raw_args = raw_args[:sep_idx]
68
+
69
+ args = parser.parse_args(raw_args)
70
+ target_module: str = args.target
71
+ out_path: str = args.out
72
+
73
+ # ------------------------------------------------------------------
74
+ # Local accumulators (NOT module-level globals — plan ban).
75
+ # ------------------------------------------------------------------
76
+ events: list[dict[str, Any]] = []
77
+ import_events: list[dict[str, Any]] = []
78
+ t0 = time.perf_counter()
79
+
80
+ # ------------------------------------------------------------------
81
+ # Save original trace/import hooks so we can restore them in finally.
82
+ # ------------------------------------------------------------------
83
+ orig_settrace = sys.gettrace()
84
+ orig_import = builtins.__import__
85
+
86
+ exit_code: int = 0
87
+ exception_info: dict[str, str] | None = None
88
+
89
+ # ------------------------------------------------------------------
90
+ # Skip-list for trace events: exclude non-user code.
91
+ # ------------------------------------------------------------------
92
+ _skip_fragments = (
93
+ os.sep + "libs" + os.sep,
94
+ "__pycache__",
95
+ "frozen importlib",
96
+ "importlib",
97
+ )
98
+
99
+ def _should_skip_filename(fname: str) -> bool:
100
+ if not fname:
101
+ return True
102
+ if fname.startswith("<"):
103
+ return True
104
+ for frag in _skip_fragments:
105
+ if frag in fname:
106
+ return True
107
+ return False
108
+
109
+ # ------------------------------------------------------------------
110
+ # Install sys.settrace call tracer.
111
+ # ------------------------------------------------------------------
112
+ def tracefunc(frame: Any, event: str, arg: Any) -> Any:
113
+ if event == "call":
114
+ fname = frame.f_code.co_filename
115
+ if not _should_skip_filename(fname):
116
+ qualname = frame.f_code.co_qualname if hasattr(frame.f_code, "co_qualname") else frame.f_code.co_name
117
+ events.append({
118
+ "event": "call",
119
+ "qualname": qualname,
120
+ "filename": fname,
121
+ "lineno": frame.f_lineno,
122
+ "ts": time.perf_counter() - t0,
123
+ })
124
+ return tracefunc
125
+
126
+ # ------------------------------------------------------------------
127
+ # Install __import__ hook to capture import events.
128
+ # ------------------------------------------------------------------
129
+ def traced_import(name: str, *import_args: Any, **import_kwargs: Any) -> Any:
130
+ import_events.append({
131
+ "event": "import",
132
+ "module": name,
133
+ "ts": time.perf_counter() - t0,
134
+ })
135
+ return orig_import(name, *import_args, **import_kwargs)
136
+
137
+ # ------------------------------------------------------------------
138
+ # Run the target module with hooks installed.
139
+ # ------------------------------------------------------------------
140
+ sys.settrace(tracefunc)
141
+ builtins.__import__ = traced_import
142
+
143
+ try:
144
+ # Set sys.argv so the target module sees appropriate argv.
145
+ sys.argv = [target_module] + list(target_argv_start)
146
+ runpy.run_module(target_module, run_name="__main__", alter_sys=True)
147
+ exit_code = 0
148
+
149
+ except SystemExit as exc:
150
+ # Normal CLI exit — not a failure.
151
+ code = exc.code
152
+ if code is None:
153
+ exit_code = 0
154
+ elif isinstance(code, int):
155
+ exit_code = code
156
+ else:
157
+ # SystemExit with a string message → treat as error (code 1).
158
+ exit_code = 1
159
+
160
+ except Exception as exc: # noqa: BLE001 -- intentional broad catch for target
161
+ exit_code = 2
162
+ exception_info = {
163
+ "type": type(exc).__name__,
164
+ "message": str(exc),
165
+ }
166
+
167
+ finally:
168
+ # Always restore hooks — plan §4b critical requirement.
169
+ sys.settrace(None)
170
+ builtins.__import__ = orig_import
171
+
172
+ # ------------------------------------------------------------------
173
+ # Write JSON output to the file specified by --out.
174
+ # ------------------------------------------------------------------
175
+ duration_s = time.perf_counter() - t0
176
+ output: dict[str, Any] = {
177
+ "events": events,
178
+ "import_events": import_events,
179
+ "exit_code": exit_code,
180
+ "duration_s": duration_s,
181
+ }
182
+ if exception_info is not None:
183
+ output["exception"] = exception_info
184
+
185
+ try:
186
+ out_text = json.dumps(output, ensure_ascii=False)
187
+ with open(out_path, "w", encoding="utf-8") as fh:
188
+ fh.write(out_text)
189
+ except OSError as exc:
190
+ print("ERROR: failed to write output file %r: %s" % (out_path, exc), file=sys.stderr)
191
+ return 3
192
+
193
+ # Return 0 if target exited normally (SystemExit 0 or clean return),
194
+ # 2 if target raised an unhandled exception.
195
+ return 0 if exit_code in (0,) else 2
196
+
197
+
198
+ if __name__ == "__main__":
199
+ sys.exit(main())
@@ -0,0 +1,71 @@
1
+ """Semantic diff for map JSON files, ignoring timestamp-like fields.
2
+
3
+ Used by rebuild worker (Phase E1) to decide whether to promote temp-dir
4
+ rebuild output to canonical location -- skip write if content unchanged
5
+ modulo ignored timestamps.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ __all__ = ["semantic_map_diff"]
15
+
16
+ _log = logging.getLogger(__name__)
17
+
18
+ _IGNORED_FIELDS: frozenset[str] = frozenset({
19
+ # Build timestamps / envelope — never semantic content
20
+ "built_at",
21
+ "freshness",
22
+ "produced_by",
23
+ "build_duration_s",
24
+ "generated_at",
25
+ "map_name",
26
+ # Build-environment git metadata (hotspot map only — see cli_entry.py
27
+ # `_hotspot_churn_meta`). `git_head_sha` leaks into the payload when any
28
+ # git activity occurs between the two rebuild runs (commit, branch move,
29
+ # rebase). `churn_source` toggles between "git_log_numstat",
30
+ # "git_log_numstat_empty", and "skipped" depending on whether the index
31
+ # refresh picked up a git hiccup. `since_window` is stable for a given
32
+ # build config but belongs to the same metadata class and is therefore
33
+ # excluded for consistency. Excluding these three fields keeps the
34
+ # semantic-diff focused on map *content*, not build environment.
35
+ "git_head_sha",
36
+ "churn_source",
37
+ "since_window",
38
+ # Derived size of the serialized map file — jitters ±1 byte whenever
39
+ # build_duration_s crosses a decimal-digit boundary (e.g. "0.42" → "0.395").
40
+ # Recorded in 00_map_index.json per-map entry; not semantic payload.
41
+ "file_bytes",
42
+ })
43
+
44
+
45
+ def semantic_map_diff(new_path: Path, old_path: Path) -> bool:
46
+ """Return True if two map JSONs are semantically identical (ignoring timestamp fields).
47
+
48
+ Returns True (identical) if both parse and stripped structures equal.
49
+ Returns False if either file unreadable/invalid JSON or content differs.
50
+ Fail-safe: any error -> False (treat as "changed", triggers write).
51
+ """
52
+ try:
53
+ new_data = json.loads(new_path.read_text(encoding="utf-8"))
54
+ old_data = json.loads(old_path.read_text(encoding="utf-8"))
55
+ except (OSError, json.JSONDecodeError) as exc:
56
+ _log.debug(
57
+ "semantic_map_diff: read/parse failed (%s): %s",
58
+ type(exc).__name__,
59
+ new_path,
60
+ )
61
+ return False # treat as changed on any error
62
+ return _strip_ignored(new_data) == _strip_ignored(old_data)
63
+
64
+
65
+ def _strip_ignored(obj: Any) -> Any:
66
+ """Recursively strip ignored fields from nested dict/list structure."""
67
+ if isinstance(obj, dict):
68
+ return {k: _strip_ignored(v) for k, v in obj.items() if k not in _IGNORED_FIELDS}
69
+ if isinstance(obj, list):
70
+ return [_strip_ignored(x) for x in obj]
71
+ return obj
@@ -0,0 +1,109 @@
1
+ """Source adapter registry and dispatch helpers.
2
+
3
+ Public API:
4
+ ADAPTERS -- dict mapping file extension -> SourceAdapter instance.
5
+ get_adapter_for_file -- return adapter for a given Path (by extension).
6
+ supported_extensions -- tuple of all currently registered extensions.
7
+ SourceAdapter -- Protocol for type-checking adapter compliance.
8
+ RegexAdapterBase -- Base class for regex-based adapters (L2+ languages).
9
+ IR signal classes -- ImportEdge, SymbolDef, ContractCandidate,
10
+ RuntimeSignal, AuthorityWriteCandidate.
11
+
12
+ Registry population (current state — all 5 languages registered):
13
+ Python (.py), TypeScript (.ts, .tsx), JavaScript (.js, .jsx),
14
+ Go (.go), Java (.java).
15
+ Historical note: L1 shipped Python only; L2 added TS/JS; L5 added Go/Java.
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ from pathlib import Path
21
+
22
+ from ._base import RegexAdapterBase, SourceAdapter
23
+ from ._ir import (
24
+ AuthorityWriteCandidate,
25
+ ContractCandidate,
26
+ ImportEdge,
27
+ RuntimeSignal,
28
+ TSRuntimeSignal,
29
+ SymbolDef,
30
+ )
31
+ from .go import GoAdapter
32
+ from .java import JavaAdapter
33
+ from .javascript import JavascriptAdapter
34
+ from .python import PythonAdapter
35
+ from .typescript import TypescriptAdapter
36
+
37
+ __all__ = [
38
+ "ADAPTERS",
39
+ "get_adapter_for_file",
40
+ "supported_extensions",
41
+ "SourceAdapter",
42
+ "RegexAdapterBase",
43
+ "ImportEdge",
44
+ "SymbolDef",
45
+ "ContractCandidate",
46
+ "RuntimeSignal",
47
+ "TSRuntimeSignal",
48
+ "AuthorityWriteCandidate",
49
+ "PythonAdapter",
50
+ "TypescriptAdapter",
51
+ "JavascriptAdapter",
52
+ "GoAdapter",
53
+ "JavaAdapter",
54
+ ]
55
+
56
+ _log = logging.getLogger(__name__)
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # Static adapter registry — keyed by lowercase file extension
60
+ # ---------------------------------------------------------------------------
61
+
62
+ # Populated at import time via _register(). Extensions must be lowercase.
63
+ ADAPTERS: dict[str, SourceAdapter] = {}
64
+
65
+
66
+ def _register(adapter: SourceAdapter) -> None:
67
+ """Register *adapter* for each extension it declares.
68
+
69
+ Extensions are stored in lowercase. Raises ValueError if an extension is
70
+ already registered (prevents silent override during development).
71
+ """
72
+ for ext in adapter.file_extensions:
73
+ key = ext.lower()
74
+ if key in ADAPTERS:
75
+ raise ValueError(
76
+ f"Duplicate adapter registration for extension {key!r}: "
77
+ f"existing={ADAPTERS[key].__class__.__name__!r}, "
78
+ f"new={adapter.__class__.__name__!r}"
79
+ )
80
+ ADAPTERS[key] = adapter
81
+ _log.debug("source_adapters: registered %s for %r", adapter.__class__.__name__, key)
82
+
83
+
84
+ # All 5 adapters registered: Python, TypeScript, JavaScript, Go, Java.
85
+ _register(PythonAdapter())
86
+ _register(TypescriptAdapter())
87
+ _register(JavascriptAdapter())
88
+ _register(GoAdapter())
89
+ _register(JavaAdapter())
90
+
91
+
92
+ # ---------------------------------------------------------------------------
93
+ # Dispatch helpers
94
+ # ---------------------------------------------------------------------------
95
+
96
+ def get_adapter_for_file(path: Path) -> SourceAdapter | None:
97
+ """Return the registered adapter for *path*'s extension, or None.
98
+
99
+ Extension lookup is case-insensitive: ``Path("FOO.PY")`` resolves to the
100
+ same adapter as ``Path("foo.py")``.
101
+
102
+ Returns None for extensions with no registered adapter (e.g. ``.ts`` in L1).
103
+ """
104
+ return ADAPTERS.get(path.suffix.lower())
105
+
106
+
107
+ def supported_extensions() -> tuple[str, ...]:
108
+ """Return a sorted tuple of all currently registered file extensions."""
109
+ return tuple(sorted(ADAPTERS.keys()))