vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,53 @@
1
+ """Fingerprint utilities for the map builder subsystem.
2
+
3
+ Provides stable, deterministic identifiers for conflict entries and schema hashes.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import hashlib
8
+ import json
9
+ import logging
10
+
11
+ __all__ = [
12
+ "make_conflict_id",
13
+ "map_schema_hash",
14
+ ]
15
+
16
+ _log = logging.getLogger(__name__)
17
+
18
+
19
+ def make_conflict_id(domain: str, subject: str, sources: list[dict]) -> str:
20
+ """Compute a stable conflict identifier from domain, subject and sources.
21
+
22
+ Sources are sorted by (map, claim) before hashing so that insertion
23
+ order does not affect the result.
24
+
25
+ Returns: ``"conf_" + first 12 hex digits of SHA-256``.
26
+ """
27
+ canonical = json.dumps(
28
+ {
29
+ "domain": domain,
30
+ "subject": subject,
31
+ "sources": sorted(sources, key=lambda s: (s.get("map", ""), s.get("claim", ""))),
32
+ },
33
+ sort_keys=True,
34
+ separators=(",", ":"),
35
+ ensure_ascii=False,
36
+ )
37
+ digest = hashlib.sha256(canonical.encode("utf-8")).hexdigest()
38
+ conflict_id = "conf_" + digest[:12]
39
+ _log.debug("make_conflict_id: domain=%s subject=%s -> %s", domain, subject, conflict_id)
40
+ return conflict_id
41
+
42
+
43
+ def map_schema_hash(entries: list[dict]) -> str:
44
+ """Compute a 16-hex-char hash over the union of field names across all entries.
45
+
46
+ This detects schema drift (field additions/removals) between builds
47
+ without comparing values.
48
+ """
49
+ all_fields = sorted({f for e in entries for f in e.keys()})
50
+ digest = hashlib.sha256(json.dumps(all_fields).encode("utf-8")).hexdigest()
51
+ schema_hash = digest[:16]
52
+ _log.debug("map_schema_hash: %d entries, %d distinct fields -> %s", len(entries), len(all_fields), schema_hash)
53
+ return schema_hash
@@ -0,0 +1,539 @@
1
+ """Hotspot map builder -- Map 6.
2
+
3
+ Aggregates multi-dimensional risk factors from all available maps into a
4
+ ranked list of file-level hotspot entries.
5
+
6
+ Generic: operates on any RepoMaps, does not assume Vigil project layout.
7
+ Sanctioned-asset patterns are passed by the caller (resolved from seed at
8
+ the CLI entry layer in Phase 7).
9
+
10
+ Public API:
11
+ build_hotspot_map(repo_maps, sanctioned_patterns=(), churn_data=None) -> list[HotspotEntry]
12
+ compute_hotspot_churn_metadata(project_dir, since_window="90.days") -> tuple[dict, dict]
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import fnmatch
17
+ import json
18
+ import logging
19
+ import math
20
+ from collections.abc import Sequence
21
+ from datetime import datetime, timezone
22
+ from pathlib import Path
23
+
24
+ from .map_common import HOTSPOT_WEIGHTS, hotspot_mode_for_score
25
+ from .map_models import AuthorityDomain, DataContractEntry, RepoMaps, StructuralEntry
26
+ from .map_models_ext import HotspotEntry
27
+
28
+ __all__ = ["build_hotspot_map", "compute_hotspot_churn_metadata"]
29
+
30
+ _log = logging.getLogger(__name__)
31
+
32
+ _SOURCE = "automated_scoring"
33
+ _CONFIDENCE = 0.88
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Helpers
38
+ # ---------------------------------------------------------------------------
39
+
40
+ def _utc_now() -> str:
41
+ return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
42
+
43
+
44
+ def _is_sanctioned(file: str, patterns: Sequence[str]) -> bool:
45
+ """Return True if file matches any of the given fnmatch patterns."""
46
+ for pat in patterns:
47
+ if fnmatch.fnmatch(file, pat):
48
+ return True
49
+ return False
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Per-component scoring functions
54
+ # ---------------------------------------------------------------------------
55
+
56
+ def _structural_risk(file: str, structural_map: tuple) -> tuple[int, list[str]]:
57
+ """Score based on structural tags. Range 0-20.
58
+
59
+ Tag weights are read from HOTSPOT_WEIGHTS["structural_tags"] so they
60
+ remain in one place (map_common.py).
61
+ """
62
+ tag_weights: dict = HOTSPOT_WEIGHTS.get("structural_tags", {
63
+ "large_file": 10,
64
+ "high_fan_in": 8,
65
+ "high_fan_out": 3,
66
+ "cycle_member": 5,
67
+ "unparseable": 0,
68
+ })
69
+ score = 0
70
+ reasons: list[str] = []
71
+ for entry in structural_map:
72
+ if not isinstance(entry, StructuralEntry) or entry.file != file:
73
+ continue
74
+ for tag in entry.tags:
75
+ w = tag_weights.get(tag, 0)
76
+ if w:
77
+ score += w
78
+ reasons.append("structural_tag:%s(+%d)" % (tag, w))
79
+ break
80
+ capped = min(score, HOTSPOT_WEIGHTS.get("structural_risk_max", 20))
81
+ return capped, reasons
82
+
83
+
84
+ def _runtime_risk(file: str, runtime_map: tuple) -> tuple[int, list[str]]:
85
+ """Score based on runtime tags. Range 0-20.
86
+
87
+ Tag weights are read from HOTSPOT_WEIGHTS["runtime_tags"] so they
88
+ remain in one place (map_common.py).
89
+ """
90
+ tag_weights: dict = HOTSPOT_WEIGHTS.get("runtime_tags", {
91
+ "import_time_side_effects": 8,
92
+ "background_task": 5,
93
+ "decorator_registry": 3,
94
+ })
95
+ score = 0
96
+ reasons: list[str] = []
97
+ for node in runtime_map:
98
+ defined_in = getattr(node, "defined_in", "")
99
+ if defined_in != file:
100
+ continue
101
+ tags = getattr(node, "tags", ())
102
+ for tag in tags:
103
+ w = tag_weights.get(tag, 0)
104
+ if w:
105
+ score += w
106
+ reasons.append("runtime_tag:%s(+%d)" % (tag, w))
107
+ capped = min(score, HOTSPOT_WEIGHTS.get("runtime_risk_max", 20))
108
+ return capped, reasons
109
+
110
+
111
+ def _authority_risk(
112
+ file: str,
113
+ authority_map: tuple,
114
+ conflict_map: tuple = (),
115
+ ) -> tuple[int, list[str]]:
116
+ """Score based on authority ownership and open conflicts. Range 0-20.
117
+
118
+ Scoring tiers (conflict-aware, Wave A Agent 3):
119
+ - canonical_owner + any open conflict in that domain -> authority_risk_with_conflict (20)
120
+ - canonical_owner + no open conflicts -> authority_risk_base (5)
121
+ - file is a source in any open conflict (writer role) -> authority_writer_in_conflict (+10)
122
+
123
+ The canonical-owner tier is evaluated first (set, not add). The writer
124
+ role is additive so a file can score both as owner and as conflict writer.
125
+ """
126
+ import json as _json
127
+
128
+ w_base: int = HOTSPOT_WEIGHTS.get("authority_risk_base", 5)
129
+ w_conflict: int = HOTSPOT_WEIGHTS.get("authority_risk_with_conflict", 20)
130
+ w_writer: int = HOTSPOT_WEIGHTS.get("authority_writer_in_conflict", 10)
131
+
132
+ score = 0
133
+ reasons: list[str] = []
134
+
135
+ # --- Canonical-owner tier ---
136
+ for domain in authority_map:
137
+ if not isinstance(domain, AuthorityDomain):
138
+ continue
139
+ if domain.canonical_owner != file:
140
+ continue
141
+ # Check whether any open ConflictEntry belongs to this domain.
142
+ has_open_conflict = _domain_has_open_conflict(domain.authority_domain, conflict_map)
143
+ if has_open_conflict:
144
+ score = w_conflict
145
+ reasons.append(
146
+ "canonical_owner_of:%s_with_open_conflict(+%d)" % (domain.authority_domain, w_conflict)
147
+ )
148
+ else:
149
+ # Preserve legacy drift-event check as fallback when no conflict map.
150
+ if not conflict_map and domain.last_drift_events:
151
+ score = w_conflict
152
+ reasons.append(
153
+ "canonical_owner_of:%s_with_drift_events(+%d)" % (domain.authority_domain, w_conflict)
154
+ )
155
+ else:
156
+ score = w_base
157
+ reasons.append(
158
+ "canonical_owner_of:%s_clean(+%d)" % (domain.authority_domain, w_base)
159
+ )
160
+ break # Only count the first matching domain; cap applied below.
161
+
162
+ # --- Writer-in-conflict tier (additive) ---
163
+ writer_hit = False
164
+ for conflict in conflict_map:
165
+ conflict_status = getattr(conflict, "conflict_status", "")
166
+ if conflict_status != "open":
167
+ continue
168
+ sources = getattr(conflict, "sources", ())
169
+ for src_raw in sources:
170
+ # sources are stored as JSON strings (per ConflictEntry model).
171
+ if isinstance(src_raw, str):
172
+ try:
173
+ src = _json.loads(src_raw)
174
+ except Exception:
175
+ src = {}
176
+ else:
177
+ src = src_raw if isinstance(src_raw, dict) else {}
178
+ if src.get("file") == file:
179
+ if not writer_hit:
180
+ score += w_writer
181
+ reasons.append("writer_in_open_conflict(+%d)" % w_writer)
182
+ writer_hit = True
183
+ break
184
+
185
+ capped = min(score, HOTSPOT_WEIGHTS.get("authority_risk_max", 20))
186
+ _log.debug("_authority_risk: file=%s raw=%d capped=%d", file, score, capped)
187
+ return capped, reasons
188
+
189
+
190
+ def _domain_has_open_conflict(domain: str, conflict_map: tuple) -> bool:
191
+ """Return True if any ConflictEntry in conflict_map is open and targets domain."""
192
+ for conflict in conflict_map:
193
+ if getattr(conflict, "conflict_status", "") != "open":
194
+ continue
195
+ # Match by domain field or subject prefix (e.g. "my_domain::symbol").
196
+ if getattr(conflict, "domain", "") == domain:
197
+ return True
198
+ subject = getattr(conflict, "subject", "")
199
+ if subject.startswith(domain + "::"):
200
+ return True
201
+ return False
202
+
203
+
204
+ def _duplication_score(
205
+ file: str,
206
+ contract_map: tuple,
207
+ ) -> tuple[int, list[str]]:
208
+ """Score +10 if file is involved in contract drift_flags. Range 0-20."""
209
+ score = 0
210
+ reasons: list[str] = []
211
+ for contract in contract_map:
212
+ if not isinstance(contract, DataContractEntry):
213
+ continue
214
+ if not contract.drift_flags:
215
+ continue
216
+ # Check if file is a writer or reader in this contract.
217
+ if file in contract.writers or file in contract.readers:
218
+ score += 10
219
+ reasons.append(
220
+ "drift_flags_in_contract:%s(+10)" % contract.entity
221
+ )
222
+ break # Once is enough for the cap.
223
+ capped = min(score, HOTSPOT_WEIGHTS.get("duplication_score_max", 20))
224
+ return capped, reasons
225
+
226
+
227
+ def _test_gap(file: str, structural_map: tuple) -> tuple[int, list[str]]:
228
+ """Score +10 if no test_<basename> file exists in structural map. Range 0-20."""
229
+ import posixpath
230
+
231
+ basename = posixpath.basename(file.replace("\\", "/"))
232
+ stem = basename[: -len(".py")] if basename.endswith(".py") else basename
233
+ expected_test = "test_" + stem
234
+
235
+ for entry in structural_map:
236
+ if not isinstance(entry, StructuralEntry):
237
+ continue
238
+ entry_base = posixpath.basename(entry.file.replace("\\", "/"))
239
+ if entry_base.startswith("test_") and expected_test in entry_base:
240
+ return 0, []
241
+
242
+ score = min(10, HOTSPOT_WEIGHTS.get("test_gap_max", 20))
243
+ return score, ["no_test_file_for:%s(+%d)" % (stem, score)]
244
+
245
+
246
+ # ---------------------------------------------------------------------------
247
+ # Public API
248
+ # ---------------------------------------------------------------------------
249
+
250
+ def _populate_hotspot_evidence(
251
+ file: str,
252
+ repo_maps: RepoMaps,
253
+ all_reasons: list[str],
254
+ ) -> tuple[str, ...]:
255
+ """Build evidence tuples linking hotspot to contributing sources.
256
+
257
+ Evidence strategy:
258
+ 1. Top fan-in sources from runtime map (kind="source_location")
259
+ 2. Related open conflicts (kind="map_entry", map="conflict")
260
+ 3. Representative list (max 8 items) to avoid bloat
261
+
262
+ Returns:
263
+ Tuple of JSON-serialized EvidenceItem strings.
264
+ """
265
+ from .map_models_findings import EvidenceItem
266
+
267
+ evidence_items: list[EvidenceItem] = []
268
+
269
+ # Build index of conflicts affecting this file
270
+ conflict_evidence_added = False
271
+ for conflict in repo_maps.conflict:
272
+ conflict_status = getattr(conflict, "conflict_status", "")
273
+ if conflict_status != "open":
274
+ continue
275
+ conflict_id = getattr(conflict, "conflict_id", "")
276
+ domain = getattr(conflict, "domain", "")
277
+ subject = getattr(conflict, "subject", "")
278
+
279
+ # Check if this file is involved in the conflict (subject match or in sources)
280
+ involved = (file == subject)
281
+ if not involved:
282
+ sources = getattr(conflict, "sources", ())
283
+ for src_raw in sources:
284
+ try:
285
+ import json as _json
286
+ src = _json.loads(src_raw) if isinstance(src_raw, str) else src_raw
287
+ if isinstance(src, dict) and src.get("file") == file:
288
+ involved = True
289
+ break
290
+ except Exception:
291
+ pass
292
+
293
+ if involved and conflict_id and not conflict_evidence_added:
294
+ evidence_items.append(EvidenceItem(
295
+ kind="map_entry",
296
+ map="conflict",
297
+ entry_id=conflict_id,
298
+ ))
299
+ conflict_evidence_added = True
300
+
301
+ # Add high fan-in sources from structural map (top 3-5 importers)
302
+ fan_in_sources: list[tuple[str, int]] = []
303
+ for entry in repo_maps.structural:
304
+ if not isinstance(entry, StructuralEntry) or entry.file != file:
305
+ continue
306
+ # Collect importers of this file, ranked by fan-in
307
+ for importer in entry.imports_in:
308
+ fan_in_sources.append((importer, 1)) # Each importer counts as 1
309
+ break
310
+
311
+ # Add top 3-5 fan-in contributors
312
+ for importer, _ in sorted(fan_in_sources, key=lambda x: x[0])[:5]:
313
+ evidence_items.append(EvidenceItem(
314
+ kind="source_location",
315
+ file=importer,
316
+ map="structural",
317
+ ))
318
+
319
+ # Add runtime nodes defined in this file (if any high-risk tags)
320
+ runtime_sources_added = 0
321
+ for node in repo_maps.runtime:
322
+ if runtime_sources_added >= 3:
323
+ break
324
+ defined_in = getattr(node, "defined_in", "")
325
+ if defined_in != file:
326
+ continue
327
+ tags = getattr(node, "tags", ())
328
+ # Only add runtime sources with significant tags
329
+ if any(tag in ("import_time_side_effects", "background_task") for tag in tags):
330
+ evidence_items.append(EvidenceItem(
331
+ kind="source_location",
332
+ file=file,
333
+ map="runtime",
334
+ ))
335
+ runtime_sources_added += 1
336
+
337
+ # Serialize to JSON strings
338
+ result: list[str] = []
339
+ for item in evidence_items[:8]: # Cap at 8 items total
340
+ result.append(json.dumps(item.to_dict(), sort_keys=True))
341
+
342
+ return tuple(result)
343
+
344
+
345
+ def build_hotspot_map(
346
+ repo_maps: RepoMaps,
347
+ sanctioned_patterns: Sequence[str] = (),
348
+ *,
349
+ churn_data: dict[str, int] | None = None,
350
+ ) -> list[HotspotEntry]:
351
+ """Build a hotspot map ranking files by multi-dimensional risk score.
352
+
353
+ Scoring formula (per spec Map 6, plan sec.19):
354
+ score = structural_risk[0-20] + runtime_risk[0-20]
355
+ + authority_risk[0-20] + duplication_score[0-20]
356
+ + failure_frequency[0] + test_gap[0-20] + churn[0-20]
357
+ - confidence_penalty[5]
358
+ clamped to [0, 130].
359
+
360
+ Mode assignment:
361
+ 0-30 -> safe_refactor
362
+ 31-60 -> contained_refactor
363
+ 61-90 -> forensic_first
364
+ 91+ -> do_not_touch_without_runtime_trace
365
+
366
+ Sanctioned files (matching any sanctioned_patterns fnmatch) are excluded.
367
+
368
+ Args:
369
+ repo_maps: Container with all available maps.
370
+ sanctioned_patterns: Glob/fnmatch patterns for files to exclude.
371
+ churn_data: Optional per-file churn line counts (relative paths ->
372
+ total added+deleted lines). If None (default), the churn
373
+ component is 0 for all files (backward-compatible behaviour).
374
+ Compute via :func:`compute_hotspot_churn_metadata`.
375
+
376
+ Returns:
377
+ List of HotspotEntry sorted by (-score, target) for deterministic
378
+ tie-breaking.
379
+
380
+ Raises:
381
+ MapIntegrityError: If structural map is empty (minimum requirement).
382
+ """
383
+ if not repo_maps.structural:
384
+ _log.info("hotspot: skipping -- structural map is empty (non-Python project or empty source tree)")
385
+ return []
386
+
387
+ churn: dict[str, int] = churn_data or {}
388
+
389
+ _log.info(
390
+ "build_hotspot_map: starting -- structural=%d runtime=%d "
391
+ "contract=%d authority=%d sanctioned_patterns=%d churn_files=%d",
392
+ len(repo_maps.structural),
393
+ len(repo_maps.runtime),
394
+ len(repo_maps.data_contract),
395
+ len(repo_maps.authority),
396
+ len(sanctioned_patterns),
397
+ len(churn),
398
+ )
399
+
400
+ freshness = _utc_now()
401
+
402
+ # Collect unique file targets from structural map (primary source of files).
403
+ candidate_files: list[str] = []
404
+ seen_files: set[str] = set()
405
+ for entry in repo_maps.structural:
406
+ if isinstance(entry, StructuralEntry) and entry.file not in seen_files:
407
+ candidate_files.append(entry.file)
408
+ seen_files.add(entry.file)
409
+
410
+ entries: list[HotspotEntry] = []
411
+
412
+ for file in candidate_files:
413
+ # Exclude sanctioned assets.
414
+ if _is_sanctioned(file, sanctioned_patterns):
415
+ _log.debug("build_hotspot_map: skipping sanctioned file %s", file)
416
+ continue
417
+
418
+ all_reasons: list[str] = []
419
+
420
+ sr, sr_reasons = _structural_risk(file, repo_maps.structural)
421
+ rr, rr_reasons = _runtime_risk(file, repo_maps.runtime)
422
+ ar, ar_reasons = _authority_risk(file, repo_maps.authority, repo_maps.conflict)
423
+ ds, ds_reasons = _duplication_score(file, repo_maps.data_contract)
424
+ tg, tg_reasons = _test_gap(file, repo_maps.structural)
425
+
426
+ # Not yet implemented (no historical data).
427
+ failure_frequency = 0
428
+ # Churn component: log-scale dampened, capped at churn_cap (default 20).
429
+ churn_raw = churn.get(file, 0)
430
+ _churn_cap_raw = HOTSPOT_WEIGHTS.get("churn_cap", 20)
431
+ _churn_cap = _churn_cap_raw if isinstance(_churn_cap_raw, int) else 20
432
+ churn_component = min(
433
+ _churn_cap,
434
+ int(math.log1p(churn_raw) * 4),
435
+ )
436
+ # Default confidence penalty.
437
+ confidence_penalty = 5
438
+
439
+ raw_score = sr + rr + ar + ds + failure_frequency + tg + churn_component - confidence_penalty
440
+
441
+ # Test-file penalty: test_*.py or *_test.py get a score reduction so
442
+ # that production files with comparable structural risk rank higher.
443
+ import posixpath as _posixpath
444
+ _basename = _posixpath.basename(file.replace("\\", "/"))
445
+ _penalty: int = 0
446
+ if _basename.startswith("test_") or _basename.endswith("_test.py"):
447
+ _penalty = HOTSPOT_WEIGHTS.get("test_file_penalty", -10)
448
+ _penalty = _penalty if _penalty < 0 else -abs(_penalty) # ensure negative
449
+ all_reasons.append("test_file_penalty(%d)" % _penalty)
450
+ _log.debug("build_hotspot_map: test file penalty applied to %s (%d)", file, _penalty)
451
+
452
+ raw_score += _penalty
453
+ score = max(0, min(130, raw_score))
454
+
455
+ all_reasons.extend(sr_reasons)
456
+ all_reasons.extend(rr_reasons)
457
+ all_reasons.extend(ar_reasons)
458
+ all_reasons.extend(ds_reasons)
459
+ all_reasons.extend(tg_reasons)
460
+ if churn_raw > 0:
461
+ all_reasons.append("churn_%d(+%d)" % (churn_raw, churn_component))
462
+
463
+ mode = hotspot_mode_for_score(score)
464
+
465
+ # Populate evidence from contributing sources
466
+ evidence = _populate_hotspot_evidence(file, repo_maps, all_reasons)
467
+
468
+ entries.append(HotspotEntry(
469
+ target=file,
470
+ hotspot_score=score,
471
+ reasons=tuple(all_reasons),
472
+ recommended_mode=mode,
473
+ source=_SOURCE,
474
+ evidence=evidence,
475
+ confidence=_CONFIDENCE,
476
+ freshness=freshness,
477
+ status="observed",
478
+ ))
479
+
480
+ # Sort: highest score first, then alphabetically by target for tie-break.
481
+ entries.sort(key=lambda e: (-e.hotspot_score, e.target))
482
+
483
+ _log.info(
484
+ "build_hotspot_map: done -- %d entries, top score=%d",
485
+ len(entries),
486
+ entries[0].hotspot_score if entries else 0,
487
+ )
488
+ return entries
489
+
490
+
491
+ def compute_hotspot_churn_metadata(
492
+ project_dir: Path,
493
+ since_window: str = "90.days",
494
+ ) -> tuple[dict[str, int], dict]:
495
+ """Compute per-file churn and metadata for index audit.
496
+
497
+ Build-scoped: no module-level caching. Each invocation executes a
498
+ ``git log --numstat`` subprocess (fail-open -- returns empty dict on any
499
+ error or when project_dir is not inside a git repo).
500
+
501
+ Args:
502
+ project_dir: Absolute path to the project root.
503
+ since_window: ``--since`` window passed to ``git log``. Format:
504
+ ``"90.days"``, ``"6.months"``, ``"2025-01-01"``, etc.
505
+
506
+ Returns:
507
+ ``(churn_data, metadata)`` where:
508
+ - ``churn_data``: ``{relative_path: total_churn_lines}`` dict suitable
509
+ for passing as ``churn_data`` kwarg to :func:`build_hotspot_map`.
510
+ - ``metadata``: ``{churn_source, git_head_sha, since_window}`` dict
511
+ for embedding in the map index under ``maps.hotspot``.
512
+ """
513
+ from ._git_utils import git_head_sha, git_has_repo, git_log_numstat
514
+
515
+ churn_data: dict[str, int] = {}
516
+ churn_source = "skipped"
517
+ git_head = None
518
+
519
+ if git_has_repo(project_dir):
520
+ churn_data = git_log_numstat(project_dir, since=since_window)
521
+ if churn_data:
522
+ churn_source = "git_log_numstat"
523
+ else:
524
+ churn_source = "git_log_numstat_empty"
525
+ git_head = git_head_sha(project_dir)
526
+
527
+ metadata: dict = {
528
+ "churn_source": churn_source,
529
+ "git_head_sha": git_head,
530
+ "since_window": since_window if churn_source.startswith("git_log_numstat") else None,
531
+ }
532
+
533
+ _log.info(
534
+ "compute_hotspot_churn_metadata: source=%s files=%d git_head=%s",
535
+ churn_source,
536
+ len(churn_data),
537
+ git_head,
538
+ )
539
+ return churn_data, metadata