vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
"""Data contract map builder -- scans target project for entity types.
|
|
2
|
+
|
|
3
|
+
Detects: @dataclass, NamedTuple, TypedDict, pydantic.BaseModel classes.
|
|
4
|
+
Builds DataContractEntry per entity with shape, writers, readers, drift flags.
|
|
5
|
+
Generic design: operates on any target project_dir via iter_py_files.
|
|
6
|
+
No exec/eval/compile/importlib.import_module of scanned files. AST only.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import ast
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Sequence
|
|
16
|
+
|
|
17
|
+
from .map_common import iter_py_files, iter_source_files
|
|
18
|
+
from .map_errors import MapBuilderError
|
|
19
|
+
from .map_models import DataContractEntry
|
|
20
|
+
from .map_storage import seeds_dir
|
|
21
|
+
from ._ast_helpers_minimal import parse_python_source_or_emit_finding
|
|
22
|
+
|
|
23
|
+
__all__ = ["build_data_contract_map"]
|
|
24
|
+
|
|
25
|
+
_log = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
_SOURCE = "static_scan"
|
|
28
|
+
_CONFIDENCE = 0.85
|
|
29
|
+
|
|
30
|
+
_DATACLASS_DECORATORS = frozenset({"dataclass", "dataclasses.dataclass"})
|
|
31
|
+
_NAMEDTUPLE_BASES = frozenset({"NamedTuple", "typing.NamedTuple"})
|
|
32
|
+
_TYPEDDICT_BASES = frozenset({"TypedDict", "typing.TypedDict"})
|
|
33
|
+
_PYDANTIC_BASES = frozenset({"BaseModel", "pydantic.BaseModel"})
|
|
34
|
+
_SERIALIZER_METHODS = frozenset({"to_dict", "to_json", "dict", "model_dump"})
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# AST helpers
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
def _node_name(node: ast.expr) -> str:
|
|
42
|
+
if isinstance(node, ast.Name):
|
|
43
|
+
return node.id
|
|
44
|
+
if isinstance(node, ast.Attribute):
|
|
45
|
+
return "%s.%s" % (_node_name(node.value), node.attr)
|
|
46
|
+
if isinstance(node, ast.Call):
|
|
47
|
+
return _node_name(node.func)
|
|
48
|
+
return ""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _is_entity(cls: ast.ClassDef) -> bool:
|
|
52
|
+
if any(_node_name(d) in _DATACLASS_DECORATORS for d in cls.decorator_list):
|
|
53
|
+
return True
|
|
54
|
+
bases = {_node_name(b) for b in cls.bases}
|
|
55
|
+
return bool(bases & (_NAMEDTUPLE_BASES | _TYPEDDICT_BASES | _PYDANTIC_BASES))
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _entity_kind(cls: ast.ClassDef) -> str:
|
|
59
|
+
if any(_node_name(d) in _DATACLASS_DECORATORS for d in cls.decorator_list):
|
|
60
|
+
return "dataclass"
|
|
61
|
+
bases = {_node_name(b) for b in cls.bases}
|
|
62
|
+
if bases & _NAMEDTUPLE_BASES:
|
|
63
|
+
return "namedtuple"
|
|
64
|
+
if bases & _TYPEDDICT_BASES:
|
|
65
|
+
return "typeddict"
|
|
66
|
+
return "pydantic"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _extract_shape(cls: ast.ClassDef) -> dict[str, str]:
|
|
70
|
+
"""Extract top-level annotated fields from class body only.
|
|
71
|
+
|
|
72
|
+
Iterates cls.body directly (not ast.walk) so that local AnnAssign
|
|
73
|
+
statements inside method bodies are never mistaken for class fields.
|
|
74
|
+
"""
|
|
75
|
+
shape: dict[str, str] = {}
|
|
76
|
+
for stmt in cls.body:
|
|
77
|
+
if isinstance(stmt, ast.AnnAssign) and isinstance(stmt.target, ast.Name):
|
|
78
|
+
try:
|
|
79
|
+
ann = ast.unparse(stmt.annotation)
|
|
80
|
+
except Exception:
|
|
81
|
+
ann = "<unknown>"
|
|
82
|
+
shape[stmt.target.id] = ann
|
|
83
|
+
return shape
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _extract_serializer_shapes(cls: ast.ClassDef) -> dict[str, list[str]]:
|
|
87
|
+
result: dict[str, list[str]] = {}
|
|
88
|
+
for stmt in cls.body:
|
|
89
|
+
if not isinstance(stmt, ast.FunctionDef) or stmt.name not in _SERIALIZER_METHODS:
|
|
90
|
+
continue
|
|
91
|
+
keys = [
|
|
92
|
+
k.value
|
|
93
|
+
for node in ast.walk(stmt)
|
|
94
|
+
if isinstance(node, ast.Dict)
|
|
95
|
+
for k in node.keys
|
|
96
|
+
if isinstance(k, ast.Constant) and isinstance(k.value, str)
|
|
97
|
+
]
|
|
98
|
+
result[stmt.name] = keys
|
|
99
|
+
return result
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
# Drift detection
|
|
104
|
+
# ---------------------------------------------------------------------------
|
|
105
|
+
|
|
106
|
+
def _drift_flags(
|
|
107
|
+
canonical_shape: dict[str, str],
|
|
108
|
+
canonical_path: str,
|
|
109
|
+
variants: list[dict],
|
|
110
|
+
serializer_shapes: dict[str, list[str]],
|
|
111
|
+
) -> list[str]:
|
|
112
|
+
flags: list[str] = []
|
|
113
|
+
cfields = set(canonical_shape)
|
|
114
|
+
|
|
115
|
+
for v in variants:
|
|
116
|
+
vpath = v.get("path", "")
|
|
117
|
+
if vpath == canonical_path:
|
|
118
|
+
continue
|
|
119
|
+
vfields = set(v.get("shape", {}))
|
|
120
|
+
added = vfields - cfields
|
|
121
|
+
removed = cfields - vfields
|
|
122
|
+
semantic = [f for f in cfields & vfields if canonical_shape[f] != v["shape"][f]]
|
|
123
|
+
if added:
|
|
124
|
+
flags.append("representational:extra_fields:%s:%s" % (vpath, ",".join(sorted(added))))
|
|
125
|
+
if removed:
|
|
126
|
+
flags.append("representational:missing_fields:%s:%s" % (vpath, ",".join(sorted(removed))))
|
|
127
|
+
for f in semantic:
|
|
128
|
+
flags.append("semantic:annotation_diff:%s:%s" % (vpath, f))
|
|
129
|
+
|
|
130
|
+
for method, keys in serializer_shapes.items():
|
|
131
|
+
if not keys:
|
|
132
|
+
continue
|
|
133
|
+
kset = set(keys)
|
|
134
|
+
extra = kset - cfields
|
|
135
|
+
missing = cfields - kset
|
|
136
|
+
if extra:
|
|
137
|
+
flags.append("serialization:%s:extra_keys:%s" % (method, ",".join(sorted(extra))))
|
|
138
|
+
if missing:
|
|
139
|
+
flags.append("serialization:%s:missing_keys:%s" % (method, ",".join(sorted(missing))))
|
|
140
|
+
|
|
141
|
+
return flags
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
# Cross-module scan
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
def _collect_writers_readers(
|
|
149
|
+
py_files: list[Path],
|
|
150
|
+
entity_names: frozenset[str],
|
|
151
|
+
rel_base: Path,
|
|
152
|
+
*,
|
|
153
|
+
syntax_error_sink=None,
|
|
154
|
+
) -> tuple[dict[str, list[str]], dict[str, list[str]]]:
|
|
155
|
+
writers: dict[str, list[str]] = {n: [] for n in entity_names}
|
|
156
|
+
readers: dict[str, list[str]] = {n: [] for n in entity_names}
|
|
157
|
+
|
|
158
|
+
for py_file in py_files:
|
|
159
|
+
try:
|
|
160
|
+
source = py_file.read_text(encoding="utf-8", errors="replace")
|
|
161
|
+
except OSError as exc:
|
|
162
|
+
_log.warning("_collect_writers_readers: cannot read %s: %s", py_file, exc)
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
rel_path_for_meta = py_file.relative_to(rel_base).as_posix()
|
|
167
|
+
except ValueError:
|
|
168
|
+
rel_path_for_meta = py_file.as_posix()
|
|
169
|
+
|
|
170
|
+
# B4 (2026-04-23): replaces silent `except SyntaxError: continue` —
|
|
171
|
+
# emits meta.syntax_parse_error via the supplied sink (if any) so
|
|
172
|
+
# broken .py files surface in downstream audits.
|
|
173
|
+
tree = parse_python_source_or_emit_finding(
|
|
174
|
+
source,
|
|
175
|
+
rel_path=rel_path_for_meta,
|
|
176
|
+
emit_finding=syntax_error_sink,
|
|
177
|
+
emitting_gate="data_contract_builder.writers_readers",
|
|
178
|
+
filename=str(py_file),
|
|
179
|
+
)
|
|
180
|
+
if tree is None:
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
rel_path = py_file.relative_to(rel_base).as_posix()
|
|
185
|
+
except ValueError:
|
|
186
|
+
rel_path = py_file.as_posix()
|
|
187
|
+
|
|
188
|
+
imported: set[str] = set()
|
|
189
|
+
for node in ast.walk(tree):
|
|
190
|
+
if isinstance(node, (ast.ImportFrom, ast.Import)):
|
|
191
|
+
for alias in node.names: # type: ignore[union-attr]
|
|
192
|
+
name = alias.asname or alias.name
|
|
193
|
+
if name in entity_names:
|
|
194
|
+
imported.add(name)
|
|
195
|
+
|
|
196
|
+
for node in ast.walk(tree):
|
|
197
|
+
if isinstance(node, ast.Call):
|
|
198
|
+
fname = _node_name(node.func)
|
|
199
|
+
# bare name or attr.name — strip prefix
|
|
200
|
+
short = fname.split(".")[-1] if "." in fname else fname
|
|
201
|
+
if short in entity_names and rel_path not in writers[short]:
|
|
202
|
+
writers[short].append(rel_path)
|
|
203
|
+
|
|
204
|
+
for name in imported:
|
|
205
|
+
if rel_path not in readers[name]:
|
|
206
|
+
readers[name].append(rel_path)
|
|
207
|
+
|
|
208
|
+
return writers, readers
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
# Priorities
|
|
213
|
+
# ---------------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
def _load_priorities(project_dir: Path) -> frozenset[str]:
|
|
216
|
+
pfile = seeds_dir(project_dir) / "data_contract_priorities.json"
|
|
217
|
+
if not pfile.exists():
|
|
218
|
+
_log.debug("_load_priorities: no priorities file at %s", pfile)
|
|
219
|
+
return frozenset()
|
|
220
|
+
try:
|
|
221
|
+
raw = json.loads(pfile.read_text(encoding="utf-8"))
|
|
222
|
+
names = raw.get("priority_entities", [])
|
|
223
|
+
if not isinstance(names, list):
|
|
224
|
+
_log.warning("_load_priorities: priority_entities not a list in %s", pfile)
|
|
225
|
+
return frozenset()
|
|
226
|
+
result = frozenset(str(n) for n in names)
|
|
227
|
+
_log.info("_load_priorities: loaded %d priority entities", len(result))
|
|
228
|
+
return result
|
|
229
|
+
except (json.JSONDecodeError, OSError) as exc:
|
|
230
|
+
_log.warning("_load_priorities: failed to read %s: %s", pfile, exc)
|
|
231
|
+
return frozenset()
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# ---------------------------------------------------------------------------
|
|
235
|
+
# Per-file scan
|
|
236
|
+
# ---------------------------------------------------------------------------
|
|
237
|
+
|
|
238
|
+
def _scan_file(py_file: Path, project_dir: Path, *, syntax_error_sink=None, source: str | None = None) -> list[dict]:
|
|
239
|
+
if source is None:
|
|
240
|
+
try:
|
|
241
|
+
source = py_file.read_text(encoding="utf-8", errors="replace")
|
|
242
|
+
except OSError as exc:
|
|
243
|
+
raise MapBuilderError("Cannot read %s: %s" % (py_file, exc)) from exc
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
rel_path_for_meta = py_file.relative_to(project_dir).as_posix()
|
|
247
|
+
except ValueError:
|
|
248
|
+
rel_path_for_meta = py_file.as_posix()
|
|
249
|
+
|
|
250
|
+
# B4 (2026-04-23): replaces silent `except SyntaxError: return []`.
|
|
251
|
+
tree = parse_python_source_or_emit_finding(
|
|
252
|
+
source,
|
|
253
|
+
rel_path=rel_path_for_meta,
|
|
254
|
+
emit_finding=syntax_error_sink,
|
|
255
|
+
emitting_gate="data_contract_builder.scan_file",
|
|
256
|
+
filename=str(py_file),
|
|
257
|
+
)
|
|
258
|
+
if tree is None:
|
|
259
|
+
return []
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
rel = py_file.relative_to(project_dir).as_posix()
|
|
263
|
+
except ValueError:
|
|
264
|
+
rel = py_file.as_posix()
|
|
265
|
+
|
|
266
|
+
result = []
|
|
267
|
+
for node in ast.walk(tree):
|
|
268
|
+
if isinstance(node, ast.ClassDef) and _is_entity(node):
|
|
269
|
+
result.append({
|
|
270
|
+
"name": node.name,
|
|
271
|
+
"kind": _entity_kind(node),
|
|
272
|
+
"path": rel,
|
|
273
|
+
"shape": _extract_shape(node),
|
|
274
|
+
"serializer_shapes": _extract_serializer_shapes(node),
|
|
275
|
+
})
|
|
276
|
+
return result
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# ---------------------------------------------------------------------------
|
|
280
|
+
# Adapter dispatch (TS/JS and other non-Python languages)
|
|
281
|
+
# ---------------------------------------------------------------------------
|
|
282
|
+
|
|
283
|
+
def _collect_adapter_contract_entries(
|
|
284
|
+
project_dir: Path,
|
|
285
|
+
freshness: str,
|
|
286
|
+
include_roots: Sequence[str] | None = None,
|
|
287
|
+
) -> list[DataContractEntry]:
|
|
288
|
+
"""Collect DataContractEntry objects from non-Python adapters with supports_contracts=True."""
|
|
289
|
+
from .source_adapters import ADAPTERS # noqa: PLC0415
|
|
290
|
+
|
|
291
|
+
contract_exts: frozenset[str] = frozenset(
|
|
292
|
+
ext for ext, ad in ADAPTERS.items()
|
|
293
|
+
if ad.supports_contracts and ad.language != "python"
|
|
294
|
+
)
|
|
295
|
+
if not contract_exts:
|
|
296
|
+
return []
|
|
297
|
+
|
|
298
|
+
entries: list[DataContractEntry] = []
|
|
299
|
+
for src_file in iter_source_files(project_dir, include_roots=include_roots):
|
|
300
|
+
if src_file.suffix.lower() not in contract_exts:
|
|
301
|
+
continue
|
|
302
|
+
adapter = ADAPTERS.get(src_file.suffix.lower())
|
|
303
|
+
if adapter is None or not adapter.supports_contracts:
|
|
304
|
+
continue
|
|
305
|
+
try:
|
|
306
|
+
content = src_file.read_text(encoding="utf-8", errors="replace")
|
|
307
|
+
candidates = adapter.extract_contracts(content, src_file)
|
|
308
|
+
except OSError as exc:
|
|
309
|
+
_log.warning("_collect_adapter_contract_entries: cannot read %s: %s", src_file, exc)
|
|
310
|
+
continue
|
|
311
|
+
except Exception as exc: # noqa: BLE001
|
|
312
|
+
_log.error("_collect_adapter_contract_entries: %s failed: %s", src_file, exc)
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
try:
|
|
316
|
+
file_posix = src_file.relative_to(project_dir).as_posix()
|
|
317
|
+
except ValueError:
|
|
318
|
+
file_posix = src_file.as_posix()
|
|
319
|
+
|
|
320
|
+
for candidate in candidates:
|
|
321
|
+
entries.append(DataContractEntry(
|
|
322
|
+
entity=candidate.name,
|
|
323
|
+
canonical_schema=file_posix,
|
|
324
|
+
variants=(), transformations=(),
|
|
325
|
+
writers=(), readers=(), drift_flags=(),
|
|
326
|
+
source="ts_regex_adapter",
|
|
327
|
+
evidence=("file:%s" % file_posix,),
|
|
328
|
+
confidence=candidate.confidence,
|
|
329
|
+
freshness=freshness,
|
|
330
|
+
status="inferred",
|
|
331
|
+
))
|
|
332
|
+
|
|
333
|
+
_log.debug("_collect_adapter_contract_entries: %d entries", len(entries))
|
|
334
|
+
return entries
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
# ---------------------------------------------------------------------------
|
|
338
|
+
# Public API
|
|
339
|
+
# ---------------------------------------------------------------------------
|
|
340
|
+
|
|
341
|
+
def build_data_contract_map(
|
|
342
|
+
project_dir: Path,
|
|
343
|
+
include_roots: Sequence[str] | None = None,
|
|
344
|
+
*,
|
|
345
|
+
syntax_error_sink=None,
|
|
346
|
+
parse_cache: Any | None = None,
|
|
347
|
+
) -> list[DataContractEntry]:
|
|
348
|
+
"""Scan target project and return DataContractEntry list.
|
|
349
|
+
|
|
350
|
+
Priority entities from <project>/.cortex/map_seeds/data_contract_priorities.json
|
|
351
|
+
receive status="canonical"; others get status="inferred".
|
|
352
|
+
|
|
353
|
+
B4 (2026-04-23): ``syntax_error_sink`` (optional callable that accepts a
|
|
354
|
+
``GateFinding``) receives ``meta.syntax_parse_error`` findings for any
|
|
355
|
+
broken .py file encountered during the scan. If ``None``, per-file counts
|
|
356
|
+
are logged at WARNING once the scan completes.
|
|
357
|
+
"""
|
|
358
|
+
project_dir = project_dir.resolve()
|
|
359
|
+
_log.info("build_data_contract_map: scanning %s", project_dir)
|
|
360
|
+
|
|
361
|
+
freshness = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
362
|
+
priority_entities = _load_priorities(project_dir)
|
|
363
|
+
py_files: list[Path] = list(iter_py_files(project_dir, include_roots=include_roots))
|
|
364
|
+
_log.info("build_data_contract_map: %d py files", len(py_files))
|
|
365
|
+
|
|
366
|
+
# B4 (2026-04-23): meta sink wiring — if no external sink provided, fall
|
|
367
|
+
# back to a local counter + WARNING log so broken files are not silent.
|
|
368
|
+
local_syntax_findings: list = []
|
|
369
|
+
effective_sink = syntax_error_sink if syntax_error_sink is not None else local_syntax_findings.append
|
|
370
|
+
|
|
371
|
+
raw: dict[str, list[dict]] = {}
|
|
372
|
+
for py_file in py_files:
|
|
373
|
+
# Use parse_cache to skip unparseable files cheaply (avoid re-read + parse).
|
|
374
|
+
cached_source = None
|
|
375
|
+
if parse_cache is not None:
|
|
376
|
+
cached = parse_cache.get_or_parse(py_file, project_dir)
|
|
377
|
+
if not cached.is_parseable:
|
|
378
|
+
_log.debug("build_data_contract_map: skipping unparseable (cache): %s", py_file.name)
|
|
379
|
+
continue
|
|
380
|
+
# Reuse cached source if available (avoids re-reading disk)
|
|
381
|
+
cached_source = parse_cache.get_cached_source(py_file)
|
|
382
|
+
for entity in _scan_file(py_file, project_dir, syntax_error_sink=effective_sink, source=cached_source):
|
|
383
|
+
raw.setdefault(entity["name"], []).append(entity)
|
|
384
|
+
|
|
385
|
+
_log.info("build_data_contract_map: %d unique entities", len(raw))
|
|
386
|
+
|
|
387
|
+
all_names = frozenset(raw)
|
|
388
|
+
writers_map, readers_map = _collect_writers_readers(
|
|
389
|
+
py_files, all_names, project_dir, syntax_error_sink=effective_sink
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
if syntax_error_sink is None and local_syntax_findings:
|
|
393
|
+
_log.warning(
|
|
394
|
+
"build_data_contract_map: %d .py files failed to parse (meta.syntax_parse_error)",
|
|
395
|
+
len(local_syntax_findings),
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
entries: list[DataContractEntry] = []
|
|
399
|
+
for entity_name, locs in raw.items():
|
|
400
|
+
locs_sorted = sorted(locs, key=lambda e: e["path"])
|
|
401
|
+
canon = locs_sorted[0]
|
|
402
|
+
canon_path = canon["path"]
|
|
403
|
+
canon_shape: dict[str, str] = canon["shape"]
|
|
404
|
+
|
|
405
|
+
variants_dicts = [{"path": l["path"], "kind": l["kind"], "shape": l["shape"]} for l in locs_sorted]
|
|
406
|
+
flags = _drift_flags(canon_shape, canon_path, variants_dicts, canon["serializer_shapes"])
|
|
407
|
+
transformations = [
|
|
408
|
+
{"kind": "serializer", "method": m, "output_keys": sorted(k)}
|
|
409
|
+
for m, k in canon["serializer_shapes"].items()
|
|
410
|
+
]
|
|
411
|
+
|
|
412
|
+
entries.append(DataContractEntry(
|
|
413
|
+
entity=entity_name,
|
|
414
|
+
canonical_schema=canon_path,
|
|
415
|
+
variants=tuple(json.dumps(v, sort_keys=True) for v in variants_dicts),
|
|
416
|
+
transformations=tuple(json.dumps(t, sort_keys=True) for t in transformations),
|
|
417
|
+
writers=tuple(sorted(set(writers_map.get(entity_name, [])))),
|
|
418
|
+
readers=tuple(sorted(set(readers_map.get(entity_name, [])))),
|
|
419
|
+
drift_flags=tuple(flags),
|
|
420
|
+
source=_SOURCE,
|
|
421
|
+
evidence=("file:%s" % canon_path,),
|
|
422
|
+
confidence=_CONFIDENCE,
|
|
423
|
+
freshness=freshness,
|
|
424
|
+
status="canonical" if entity_name in priority_entities else "inferred",
|
|
425
|
+
))
|
|
426
|
+
|
|
427
|
+
# Collect contracts from TS/JS and other non-Python adapters
|
|
428
|
+
try:
|
|
429
|
+
adapter_entries = _collect_adapter_contract_entries(
|
|
430
|
+
project_dir, freshness, include_roots=include_roots
|
|
431
|
+
)
|
|
432
|
+
entries.extend(adapter_entries)
|
|
433
|
+
if adapter_entries:
|
|
434
|
+
_log.info(
|
|
435
|
+
"build_data_contract_map: +%d entries from non-Python adapters",
|
|
436
|
+
len(adapter_entries),
|
|
437
|
+
)
|
|
438
|
+
except Exception as exc: # noqa: BLE001
|
|
439
|
+
_log.error("build_data_contract_map: adapter contract scan failed: %s", exc)
|
|
440
|
+
|
|
441
|
+
entries.sort(key=lambda e: e.entity)
|
|
442
|
+
_log.info(
|
|
443
|
+
"build_data_contract_map: %d entries (%d with drift)",
|
|
444
|
+
len(entries), sum(1 for e in entries if e.drift_flags),
|
|
445
|
+
)
|
|
446
|
+
return entries
|