vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,446 @@
1
+ """Data contract map builder -- scans target project for entity types.
2
+
3
+ Detects: @dataclass, NamedTuple, TypedDict, pydantic.BaseModel classes.
4
+ Builds DataContractEntry per entity with shape, writers, readers, drift flags.
5
+ Generic design: operates on any target project_dir via iter_py_files.
6
+ No exec/eval/compile/importlib.import_module of scanned files. AST only.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import ast
11
+ import json
12
+ import logging
13
+ from datetime import datetime, timezone
14
+ from pathlib import Path
15
+ from typing import Any, Sequence
16
+
17
+ from .map_common import iter_py_files, iter_source_files
18
+ from .map_errors import MapBuilderError
19
+ from .map_models import DataContractEntry
20
+ from .map_storage import seeds_dir
21
+ from ._ast_helpers_minimal import parse_python_source_or_emit_finding
22
+
23
+ __all__ = ["build_data_contract_map"]
24
+
25
+ _log = logging.getLogger(__name__)
26
+
27
+ _SOURCE = "static_scan"
28
+ _CONFIDENCE = 0.85
29
+
30
+ _DATACLASS_DECORATORS = frozenset({"dataclass", "dataclasses.dataclass"})
31
+ _NAMEDTUPLE_BASES = frozenset({"NamedTuple", "typing.NamedTuple"})
32
+ _TYPEDDICT_BASES = frozenset({"TypedDict", "typing.TypedDict"})
33
+ _PYDANTIC_BASES = frozenset({"BaseModel", "pydantic.BaseModel"})
34
+ _SERIALIZER_METHODS = frozenset({"to_dict", "to_json", "dict", "model_dump"})
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # AST helpers
39
+ # ---------------------------------------------------------------------------
40
+
41
+ def _node_name(node: ast.expr) -> str:
42
+ if isinstance(node, ast.Name):
43
+ return node.id
44
+ if isinstance(node, ast.Attribute):
45
+ return "%s.%s" % (_node_name(node.value), node.attr)
46
+ if isinstance(node, ast.Call):
47
+ return _node_name(node.func)
48
+ return ""
49
+
50
+
51
+ def _is_entity(cls: ast.ClassDef) -> bool:
52
+ if any(_node_name(d) in _DATACLASS_DECORATORS for d in cls.decorator_list):
53
+ return True
54
+ bases = {_node_name(b) for b in cls.bases}
55
+ return bool(bases & (_NAMEDTUPLE_BASES | _TYPEDDICT_BASES | _PYDANTIC_BASES))
56
+
57
+
58
+ def _entity_kind(cls: ast.ClassDef) -> str:
59
+ if any(_node_name(d) in _DATACLASS_DECORATORS for d in cls.decorator_list):
60
+ return "dataclass"
61
+ bases = {_node_name(b) for b in cls.bases}
62
+ if bases & _NAMEDTUPLE_BASES:
63
+ return "namedtuple"
64
+ if bases & _TYPEDDICT_BASES:
65
+ return "typeddict"
66
+ return "pydantic"
67
+
68
+
69
+ def _extract_shape(cls: ast.ClassDef) -> dict[str, str]:
70
+ """Extract top-level annotated fields from class body only.
71
+
72
+ Iterates cls.body directly (not ast.walk) so that local AnnAssign
73
+ statements inside method bodies are never mistaken for class fields.
74
+ """
75
+ shape: dict[str, str] = {}
76
+ for stmt in cls.body:
77
+ if isinstance(stmt, ast.AnnAssign) and isinstance(stmt.target, ast.Name):
78
+ try:
79
+ ann = ast.unparse(stmt.annotation)
80
+ except Exception:
81
+ ann = "<unknown>"
82
+ shape[stmt.target.id] = ann
83
+ return shape
84
+
85
+
86
+ def _extract_serializer_shapes(cls: ast.ClassDef) -> dict[str, list[str]]:
87
+ result: dict[str, list[str]] = {}
88
+ for stmt in cls.body:
89
+ if not isinstance(stmt, ast.FunctionDef) or stmt.name not in _SERIALIZER_METHODS:
90
+ continue
91
+ keys = [
92
+ k.value
93
+ for node in ast.walk(stmt)
94
+ if isinstance(node, ast.Dict)
95
+ for k in node.keys
96
+ if isinstance(k, ast.Constant) and isinstance(k.value, str)
97
+ ]
98
+ result[stmt.name] = keys
99
+ return result
100
+
101
+
102
+ # ---------------------------------------------------------------------------
103
+ # Drift detection
104
+ # ---------------------------------------------------------------------------
105
+
106
+ def _drift_flags(
107
+ canonical_shape: dict[str, str],
108
+ canonical_path: str,
109
+ variants: list[dict],
110
+ serializer_shapes: dict[str, list[str]],
111
+ ) -> list[str]:
112
+ flags: list[str] = []
113
+ cfields = set(canonical_shape)
114
+
115
+ for v in variants:
116
+ vpath = v.get("path", "")
117
+ if vpath == canonical_path:
118
+ continue
119
+ vfields = set(v.get("shape", {}))
120
+ added = vfields - cfields
121
+ removed = cfields - vfields
122
+ semantic = [f for f in cfields & vfields if canonical_shape[f] != v["shape"][f]]
123
+ if added:
124
+ flags.append("representational:extra_fields:%s:%s" % (vpath, ",".join(sorted(added))))
125
+ if removed:
126
+ flags.append("representational:missing_fields:%s:%s" % (vpath, ",".join(sorted(removed))))
127
+ for f in semantic:
128
+ flags.append("semantic:annotation_diff:%s:%s" % (vpath, f))
129
+
130
+ for method, keys in serializer_shapes.items():
131
+ if not keys:
132
+ continue
133
+ kset = set(keys)
134
+ extra = kset - cfields
135
+ missing = cfields - kset
136
+ if extra:
137
+ flags.append("serialization:%s:extra_keys:%s" % (method, ",".join(sorted(extra))))
138
+ if missing:
139
+ flags.append("serialization:%s:missing_keys:%s" % (method, ",".join(sorted(missing))))
140
+
141
+ return flags
142
+
143
+
144
+ # ---------------------------------------------------------------------------
145
+ # Cross-module scan
146
+ # ---------------------------------------------------------------------------
147
+
148
+ def _collect_writers_readers(
149
+ py_files: list[Path],
150
+ entity_names: frozenset[str],
151
+ rel_base: Path,
152
+ *,
153
+ syntax_error_sink=None,
154
+ ) -> tuple[dict[str, list[str]], dict[str, list[str]]]:
155
+ writers: dict[str, list[str]] = {n: [] for n in entity_names}
156
+ readers: dict[str, list[str]] = {n: [] for n in entity_names}
157
+
158
+ for py_file in py_files:
159
+ try:
160
+ source = py_file.read_text(encoding="utf-8", errors="replace")
161
+ except OSError as exc:
162
+ _log.warning("_collect_writers_readers: cannot read %s: %s", py_file, exc)
163
+ continue
164
+
165
+ try:
166
+ rel_path_for_meta = py_file.relative_to(rel_base).as_posix()
167
+ except ValueError:
168
+ rel_path_for_meta = py_file.as_posix()
169
+
170
+ # B4 (2026-04-23): replaces silent `except SyntaxError: continue` —
171
+ # emits meta.syntax_parse_error via the supplied sink (if any) so
172
+ # broken .py files surface in downstream audits.
173
+ tree = parse_python_source_or_emit_finding(
174
+ source,
175
+ rel_path=rel_path_for_meta,
176
+ emit_finding=syntax_error_sink,
177
+ emitting_gate="data_contract_builder.writers_readers",
178
+ filename=str(py_file),
179
+ )
180
+ if tree is None:
181
+ continue
182
+
183
+ try:
184
+ rel_path = py_file.relative_to(rel_base).as_posix()
185
+ except ValueError:
186
+ rel_path = py_file.as_posix()
187
+
188
+ imported: set[str] = set()
189
+ for node in ast.walk(tree):
190
+ if isinstance(node, (ast.ImportFrom, ast.Import)):
191
+ for alias in node.names: # type: ignore[union-attr]
192
+ name = alias.asname or alias.name
193
+ if name in entity_names:
194
+ imported.add(name)
195
+
196
+ for node in ast.walk(tree):
197
+ if isinstance(node, ast.Call):
198
+ fname = _node_name(node.func)
199
+ # bare name or attr.name — strip prefix
200
+ short = fname.split(".")[-1] if "." in fname else fname
201
+ if short in entity_names and rel_path not in writers[short]:
202
+ writers[short].append(rel_path)
203
+
204
+ for name in imported:
205
+ if rel_path not in readers[name]:
206
+ readers[name].append(rel_path)
207
+
208
+ return writers, readers
209
+
210
+
211
+ # ---------------------------------------------------------------------------
212
+ # Priorities
213
+ # ---------------------------------------------------------------------------
214
+
215
+ def _load_priorities(project_dir: Path) -> frozenset[str]:
216
+ pfile = seeds_dir(project_dir) / "data_contract_priorities.json"
217
+ if not pfile.exists():
218
+ _log.debug("_load_priorities: no priorities file at %s", pfile)
219
+ return frozenset()
220
+ try:
221
+ raw = json.loads(pfile.read_text(encoding="utf-8"))
222
+ names = raw.get("priority_entities", [])
223
+ if not isinstance(names, list):
224
+ _log.warning("_load_priorities: priority_entities not a list in %s", pfile)
225
+ return frozenset()
226
+ result = frozenset(str(n) for n in names)
227
+ _log.info("_load_priorities: loaded %d priority entities", len(result))
228
+ return result
229
+ except (json.JSONDecodeError, OSError) as exc:
230
+ _log.warning("_load_priorities: failed to read %s: %s", pfile, exc)
231
+ return frozenset()
232
+
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # Per-file scan
236
+ # ---------------------------------------------------------------------------
237
+
238
+ def _scan_file(py_file: Path, project_dir: Path, *, syntax_error_sink=None, source: str | None = None) -> list[dict]:
239
+ if source is None:
240
+ try:
241
+ source = py_file.read_text(encoding="utf-8", errors="replace")
242
+ except OSError as exc:
243
+ raise MapBuilderError("Cannot read %s: %s" % (py_file, exc)) from exc
244
+
245
+ try:
246
+ rel_path_for_meta = py_file.relative_to(project_dir).as_posix()
247
+ except ValueError:
248
+ rel_path_for_meta = py_file.as_posix()
249
+
250
+ # B4 (2026-04-23): replaces silent `except SyntaxError: return []`.
251
+ tree = parse_python_source_or_emit_finding(
252
+ source,
253
+ rel_path=rel_path_for_meta,
254
+ emit_finding=syntax_error_sink,
255
+ emitting_gate="data_contract_builder.scan_file",
256
+ filename=str(py_file),
257
+ )
258
+ if tree is None:
259
+ return []
260
+
261
+ try:
262
+ rel = py_file.relative_to(project_dir).as_posix()
263
+ except ValueError:
264
+ rel = py_file.as_posix()
265
+
266
+ result = []
267
+ for node in ast.walk(tree):
268
+ if isinstance(node, ast.ClassDef) and _is_entity(node):
269
+ result.append({
270
+ "name": node.name,
271
+ "kind": _entity_kind(node),
272
+ "path": rel,
273
+ "shape": _extract_shape(node),
274
+ "serializer_shapes": _extract_serializer_shapes(node),
275
+ })
276
+ return result
277
+
278
+
279
+ # ---------------------------------------------------------------------------
280
+ # Adapter dispatch (TS/JS and other non-Python languages)
281
+ # ---------------------------------------------------------------------------
282
+
283
+ def _collect_adapter_contract_entries(
284
+ project_dir: Path,
285
+ freshness: str,
286
+ include_roots: Sequence[str] | None = None,
287
+ ) -> list[DataContractEntry]:
288
+ """Collect DataContractEntry objects from non-Python adapters with supports_contracts=True."""
289
+ from .source_adapters import ADAPTERS # noqa: PLC0415
290
+
291
+ contract_exts: frozenset[str] = frozenset(
292
+ ext for ext, ad in ADAPTERS.items()
293
+ if ad.supports_contracts and ad.language != "python"
294
+ )
295
+ if not contract_exts:
296
+ return []
297
+
298
+ entries: list[DataContractEntry] = []
299
+ for src_file in iter_source_files(project_dir, include_roots=include_roots):
300
+ if src_file.suffix.lower() not in contract_exts:
301
+ continue
302
+ adapter = ADAPTERS.get(src_file.suffix.lower())
303
+ if adapter is None or not adapter.supports_contracts:
304
+ continue
305
+ try:
306
+ content = src_file.read_text(encoding="utf-8", errors="replace")
307
+ candidates = adapter.extract_contracts(content, src_file)
308
+ except OSError as exc:
309
+ _log.warning("_collect_adapter_contract_entries: cannot read %s: %s", src_file, exc)
310
+ continue
311
+ except Exception as exc: # noqa: BLE001
312
+ _log.error("_collect_adapter_contract_entries: %s failed: %s", src_file, exc)
313
+ continue
314
+
315
+ try:
316
+ file_posix = src_file.relative_to(project_dir).as_posix()
317
+ except ValueError:
318
+ file_posix = src_file.as_posix()
319
+
320
+ for candidate in candidates:
321
+ entries.append(DataContractEntry(
322
+ entity=candidate.name,
323
+ canonical_schema=file_posix,
324
+ variants=(), transformations=(),
325
+ writers=(), readers=(), drift_flags=(),
326
+ source="ts_regex_adapter",
327
+ evidence=("file:%s" % file_posix,),
328
+ confidence=candidate.confidence,
329
+ freshness=freshness,
330
+ status="inferred",
331
+ ))
332
+
333
+ _log.debug("_collect_adapter_contract_entries: %d entries", len(entries))
334
+ return entries
335
+
336
+
337
+ # ---------------------------------------------------------------------------
338
+ # Public API
339
+ # ---------------------------------------------------------------------------
340
+
341
+ def build_data_contract_map(
342
+ project_dir: Path,
343
+ include_roots: Sequence[str] | None = None,
344
+ *,
345
+ syntax_error_sink=None,
346
+ parse_cache: Any | None = None,
347
+ ) -> list[DataContractEntry]:
348
+ """Scan target project and return DataContractEntry list.
349
+
350
+ Priority entities from <project>/.cortex/map_seeds/data_contract_priorities.json
351
+ receive status="canonical"; others get status="inferred".
352
+
353
+ B4 (2026-04-23): ``syntax_error_sink`` (optional callable that accepts a
354
+ ``GateFinding``) receives ``meta.syntax_parse_error`` findings for any
355
+ broken .py file encountered during the scan. If ``None``, per-file counts
356
+ are logged at WARNING once the scan completes.
357
+ """
358
+ project_dir = project_dir.resolve()
359
+ _log.info("build_data_contract_map: scanning %s", project_dir)
360
+
361
+ freshness = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
362
+ priority_entities = _load_priorities(project_dir)
363
+ py_files: list[Path] = list(iter_py_files(project_dir, include_roots=include_roots))
364
+ _log.info("build_data_contract_map: %d py files", len(py_files))
365
+
366
+ # B4 (2026-04-23): meta sink wiring — if no external sink provided, fall
367
+ # back to a local counter + WARNING log so broken files are not silent.
368
+ local_syntax_findings: list = []
369
+ effective_sink = syntax_error_sink if syntax_error_sink is not None else local_syntax_findings.append
370
+
371
+ raw: dict[str, list[dict]] = {}
372
+ for py_file in py_files:
373
+ # Use parse_cache to skip unparseable files cheaply (avoid re-read + parse).
374
+ cached_source = None
375
+ if parse_cache is not None:
376
+ cached = parse_cache.get_or_parse(py_file, project_dir)
377
+ if not cached.is_parseable:
378
+ _log.debug("build_data_contract_map: skipping unparseable (cache): %s", py_file.name)
379
+ continue
380
+ # Reuse cached source if available (avoids re-reading disk)
381
+ cached_source = parse_cache.get_cached_source(py_file)
382
+ for entity in _scan_file(py_file, project_dir, syntax_error_sink=effective_sink, source=cached_source):
383
+ raw.setdefault(entity["name"], []).append(entity)
384
+
385
+ _log.info("build_data_contract_map: %d unique entities", len(raw))
386
+
387
+ all_names = frozenset(raw)
388
+ writers_map, readers_map = _collect_writers_readers(
389
+ py_files, all_names, project_dir, syntax_error_sink=effective_sink
390
+ )
391
+
392
+ if syntax_error_sink is None and local_syntax_findings:
393
+ _log.warning(
394
+ "build_data_contract_map: %d .py files failed to parse (meta.syntax_parse_error)",
395
+ len(local_syntax_findings),
396
+ )
397
+
398
+ entries: list[DataContractEntry] = []
399
+ for entity_name, locs in raw.items():
400
+ locs_sorted = sorted(locs, key=lambda e: e["path"])
401
+ canon = locs_sorted[0]
402
+ canon_path = canon["path"]
403
+ canon_shape: dict[str, str] = canon["shape"]
404
+
405
+ variants_dicts = [{"path": l["path"], "kind": l["kind"], "shape": l["shape"]} for l in locs_sorted]
406
+ flags = _drift_flags(canon_shape, canon_path, variants_dicts, canon["serializer_shapes"])
407
+ transformations = [
408
+ {"kind": "serializer", "method": m, "output_keys": sorted(k)}
409
+ for m, k in canon["serializer_shapes"].items()
410
+ ]
411
+
412
+ entries.append(DataContractEntry(
413
+ entity=entity_name,
414
+ canonical_schema=canon_path,
415
+ variants=tuple(json.dumps(v, sort_keys=True) for v in variants_dicts),
416
+ transformations=tuple(json.dumps(t, sort_keys=True) for t in transformations),
417
+ writers=tuple(sorted(set(writers_map.get(entity_name, [])))),
418
+ readers=tuple(sorted(set(readers_map.get(entity_name, [])))),
419
+ drift_flags=tuple(flags),
420
+ source=_SOURCE,
421
+ evidence=("file:%s" % canon_path,),
422
+ confidence=_CONFIDENCE,
423
+ freshness=freshness,
424
+ status="canonical" if entity_name in priority_entities else "inferred",
425
+ ))
426
+
427
+ # Collect contracts from TS/JS and other non-Python adapters
428
+ try:
429
+ adapter_entries = _collect_adapter_contract_entries(
430
+ project_dir, freshness, include_roots=include_roots
431
+ )
432
+ entries.extend(adapter_entries)
433
+ if adapter_entries:
434
+ _log.info(
435
+ "build_data_contract_map: +%d entries from non-Python adapters",
436
+ len(adapter_entries),
437
+ )
438
+ except Exception as exc: # noqa: BLE001
439
+ _log.error("build_data_contract_map: adapter contract scan failed: %s", exc)
440
+
441
+ entries.sort(key=lambda e: e.entity)
442
+ _log.info(
443
+ "build_data_contract_map: %d entries (%d with drift)",
444
+ len(entries), sum(1 for e in entries if e.drift_flags),
445
+ )
446
+ return entries