vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,129 @@
1
+ """Shared file-count guard (anti-hang on huge repos).
2
+
3
+ Both the forensic auditor and the code-map builder do per-file work that scales
4
+ with the number of source files (forensic averages ~0.4 s/file across its gate
5
+ AST walks). On a real repo with thousands of files this turns into hours and
6
+ effectively hangs the machine. A per-file *size* guard already exists but does
7
+ nothing against thousands of *small* files — only a guard on the file COUNT can.
8
+
9
+ This module is pure stdlib (no project imports) so it is safe to import from
10
+ either package without circular-import risk. It lives in ``vigil_mapper``
11
+ because the dependency arrow is forensic -> map (forensic may import map; map
12
+ never imports forensic), so this is the one place both sides can share.
13
+
14
+ Helpers
15
+ -------
16
+ summarize_top_subdirs(rel_paths, limit)
17
+ Group relative paths by their top-level directory component and return the
18
+ ``limit`` biggest as ``[{"dir": str, "files": int}, ...]`` (descending).
19
+ Files directly under the project root are grouped under ``"."``.
20
+ build_too_many_files_meta(rel_paths, max_files, *, entry_call=...)
21
+ Build the structured ``too_many_files`` meta dict returned by both tools
22
+ when ``len(rel_paths) > max_files``.
23
+
24
+ Default ceiling
25
+ ---------------
26
+ ``DEFAULT_MAX_FILES = 800``. Forensic averages ~0.4 s/file, so ~800 files is a
27
+ ~5-minute ceiling — a sane upper bound for an interactive tool. Callers can pass
28
+ a larger ``max_files`` to force a full scan.
29
+ """
30
+ from __future__ import annotations
31
+
32
+ from collections import Counter
33
+
34
+ __all__ = [
35
+ "DEFAULT_MAX_FILES",
36
+ "summarize_top_subdirs",
37
+ "build_too_many_files_meta",
38
+ ]
39
+
40
+ # Forensic averages ~0.4 s/file -> ~800 files is a ~5 min ceiling.
41
+ DEFAULT_MAX_FILES = 800
42
+
43
+ # How many top sub-directories to report in the skip result.
44
+ _TOP_SUBDIRS = 8
45
+
46
+
47
+ def _top_component(rel_path: str) -> str:
48
+ """Return the first path component of a posix-ish relative path.
49
+
50
+ Files directly under the project root (no separator) are grouped under
51
+ ``"."`` so the caller always gets a stable bucket name. Leading ``"./"`` is
52
+ stripped, but a leading-dot directory name (e.g. ``.claude``) is preserved.
53
+ """
54
+ norm = rel_path.replace("\\", "/")
55
+ while norm.startswith("./"):
56
+ norm = norm[2:]
57
+ norm = norm.lstrip("/")
58
+ if not norm:
59
+ return "."
60
+ head, sep, _tail = norm.partition("/")
61
+ if not sep:
62
+ return "."
63
+ return head
64
+
65
+
66
+ def summarize_top_subdirs(
67
+ rel_paths: list[str] | tuple[str, ...],
68
+ limit: int = _TOP_SUBDIRS,
69
+ ) -> list[dict[str, int]]:
70
+ """Group *rel_paths* by top-level dir; return the *limit* biggest buckets.
71
+
72
+ Args:
73
+ rel_paths: Relative source-file paths (``"/"`` or ``"\\"`` separated).
74
+ limit: Max number of buckets to return.
75
+
76
+ Returns:
77
+ ``[{"dir": str, "files": int}, ...]`` sorted by ``files`` descending,
78
+ ties broken by directory name for determinism.
79
+ """
80
+ counter: Counter[str] = Counter(_top_component(p) for p in rel_paths)
81
+ # Sort by count desc, then dir name asc (deterministic).
82
+ ordered = sorted(counter.items(), key=lambda kv: (-kv[1], kv[0]))
83
+ return [{"dir": d, "files": n} for d, n in ordered[:limit]]
84
+
85
+
86
+ def build_too_many_files_meta(
87
+ rel_paths: list[str] | tuple[str, ...],
88
+ max_files: int,
89
+ *,
90
+ entry_call: str = "start_forensic_audit",
91
+ ) -> dict:
92
+ """Build the structured ``too_many_files`` meta payload.
93
+
94
+ Args:
95
+ rel_paths: The collected relative source-file paths (the over-limit set).
96
+ max_files: The ceiling that was exceeded.
97
+ entry_call: Name of the MCP entry the suggestion should reference, e.g.
98
+ ``"start_forensic_audit"`` or ``"start_code_map"``.
99
+
100
+ Returns:
101
+ A dict with ``skipped_reason``, ``file_count``, ``max_files``,
102
+ ``top_subdirs`` and a human ``suggestion`` naming the biggest subdir.
103
+ """
104
+ top_subdirs = summarize_top_subdirs(rel_paths)
105
+ # Pick the biggest *named* subdir (skip the root "." bucket) for the example.
106
+ example_dir = None
107
+ for entry in top_subdirs:
108
+ if entry["dir"] != ".":
109
+ example_dir = entry["dir"]
110
+ break
111
+
112
+ if example_dir is not None:
113
+ suggestion = (
114
+ f"Scan a submodule, e.g. {entry_call}(path='<dir>/{example_dir}'), "
115
+ f"or raise max_files to force a full scan."
116
+ )
117
+ else:
118
+ suggestion = (
119
+ f"Scan a submodule, e.g. {entry_call}(path='<dir>/<subdir>'), "
120
+ f"or raise max_files to force a full scan."
121
+ )
122
+
123
+ return {
124
+ "skipped_reason": "too_many_files",
125
+ "file_count": len(rel_paths),
126
+ "max_files": max_files,
127
+ "top_subdirs": top_subdirs,
128
+ "suggestion": suggestion,
129
+ }
@@ -0,0 +1,178 @@
1
+ """Neutral shared git helpers. Depends only on stdlib.
2
+
3
+ Used by map_builder (churn) and gate_checks (diff-based checks).
4
+ Never imports from gate_checks or map_builder (correct dependency direction).
5
+
6
+ Public API:
7
+ git_show(path, ref, project_dir) -- file content at git ref
8
+ git_log_numstat(project_dir, since) -- churn line counts per file
9
+ git_has_repo(project_dir) -- is inside a git work tree?
10
+ git_head_sha(project_dir) -- current HEAD SHA
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ import subprocess
16
+ from pathlib import Path
17
+
18
+ _log = logging.getLogger(__name__)
19
+
20
+ __all__ = [
21
+ "git_show",
22
+ "git_log_numstat",
23
+ "git_has_repo",
24
+ "git_head_sha",
25
+ ]
26
+
27
+
28
+ def git_show(
29
+ path: str,
30
+ ref: str = "HEAD~1",
31
+ project_dir: Path | None = None,
32
+ ) -> str | None:
33
+ """Return file content at git ref or None on failure.
34
+
35
+ Args:
36
+ path: Relative file path (as stored in git, e.g. "BRAIN/foo.py").
37
+ ref: Git ref to read from. Defaults to "HEAD~1".
38
+ project_dir: If given, passes ``-C project_dir`` to git so the command
39
+ runs in the correct working directory regardless of the caller's cwd.
40
+
41
+ Returns:
42
+ File content as a string, or None if the file didn't exist at that ref,
43
+ git is unavailable, or any other error occurs (fail-open).
44
+ """
45
+ args = ["git"]
46
+ if project_dir is not None:
47
+ args += ["-C", str(project_dir)]
48
+ args += ["show", "%s:%s" % (ref, path)]
49
+
50
+ try:
51
+ r = subprocess.run(
52
+ args,
53
+ capture_output=True,
54
+ text=True,
55
+ encoding="utf-8",
56
+ errors="replace",
57
+ timeout=10,
58
+ shell=False,
59
+ )
60
+ if r.returncode != 0:
61
+ return None
62
+ return r.stdout
63
+ except (subprocess.SubprocessError, FileNotFoundError, OSError) as exc:
64
+ _log.debug("git_show failed for %s@%s: %s", path, ref, type(exc).__name__)
65
+ return None
66
+
67
+
68
+ def git_log_numstat(
69
+ project_dir: Path,
70
+ since: str = "90.days",
71
+ ) -> dict[str, int]:
72
+ """Return ``{relative_path: churn_line_count}`` for commits since *since*.
73
+
74
+ Churn is defined as added + deleted lines across all commits in the window.
75
+ Binary files (where git outputs ``-`` for line counts) are skipped.
76
+
77
+ Args:
78
+ project_dir: Absolute path to the project root (must be inside a git repo).
79
+ since: ``--since`` value passed to ``git log``, e.g. ``"90.days"`` or
80
+ ``"2025-01-01"``.
81
+
82
+ Returns:
83
+ Dict mapping each file path to total churn line count. Returns an empty
84
+ dict if the directory is not a git repo, git is unavailable, or any
85
+ subprocess error occurs (fail-open).
86
+ """
87
+ try:
88
+ r = subprocess.run(
89
+ [
90
+ "git",
91
+ "-C", str(project_dir),
92
+ "log",
93
+ "--numstat",
94
+ "--since=%s" % since,
95
+ "--pretty=format:",
96
+ ],
97
+ capture_output=True,
98
+ text=True,
99
+ encoding="utf-8",
100
+ errors="replace",
101
+ timeout=30,
102
+ shell=False,
103
+ )
104
+ if r.returncode != 0:
105
+ return {}
106
+ result: dict[str, int] = {}
107
+ for line in r.stdout.splitlines():
108
+ parts = line.split("\t")
109
+ if len(parts) != 3:
110
+ continue
111
+ added, deleted, path = parts
112
+ # Binary files have "-" for line counts — skip them
113
+ if added == "-" or deleted == "-":
114
+ continue
115
+ try:
116
+ churn = int(added) + int(deleted)
117
+ except ValueError:
118
+ continue
119
+ result[path] = result.get(path, 0) + churn
120
+ return result
121
+ except (subprocess.SubprocessError, FileNotFoundError, OSError) as exc:
122
+ _log.debug("git_log_numstat failed in %s: %s", project_dir, type(exc).__name__)
123
+ return {}
124
+
125
+
126
+ def git_has_repo(project_dir: Path) -> bool:
127
+ """Return True if *project_dir* is inside a git work tree.
128
+
129
+ Uses ``git rev-parse --is-inside-work-tree``. Returns False on any error,
130
+ including git not installed or directory not being a repo (fail-open).
131
+ """
132
+ try:
133
+ r = subprocess.run(
134
+ [
135
+ "git",
136
+ "-C", str(project_dir),
137
+ "rev-parse",
138
+ "--is-inside-work-tree",
139
+ ],
140
+ capture_output=True,
141
+ text=True,
142
+ encoding="utf-8",
143
+ errors="replace",
144
+ timeout=5,
145
+ shell=False,
146
+ )
147
+ return r.returncode == 0 and r.stdout.strip() == "true"
148
+ except (subprocess.SubprocessError, FileNotFoundError, OSError):
149
+ return False
150
+
151
+
152
+ def git_head_sha(project_dir: Path) -> str | None:
153
+ """Return current HEAD SHA or None on non-git / error.
154
+
155
+ Returns:
156
+ 40-character hex SHA string, or None if git is unavailable, the
157
+ directory is not a repo, or any other error occurs (fail-open).
158
+ """
159
+ try:
160
+ r = subprocess.run(
161
+ [
162
+ "git",
163
+ "-C", str(project_dir),
164
+ "rev-parse",
165
+ "HEAD",
166
+ ],
167
+ capture_output=True,
168
+ text=True,
169
+ encoding="utf-8",
170
+ errors="replace",
171
+ timeout=5,
172
+ shell=False,
173
+ )
174
+ if r.returncode != 0:
175
+ return None
176
+ return r.stdout.strip() or None
177
+ except (subprocess.SubprocessError, FileNotFoundError, OSError):
178
+ return None