vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,318 @@
1
+ """ML/NN correctness forensic checks (static AST, Python-only).
2
+
3
+ Catches machine-learning / quant-trading bugs that generic linters miss and that
4
+ are catastrophic in backtests and live trading:
5
+
6
+ ml.lookahead_negative_shift -- .shift(-N): future data leaks into the present row
7
+ ml.nondeterministic_split -- train_test_split(...) with no random_state
8
+ ml.scaler_fit_on_test -- .fit()/.fit_transform() on a *_test / *_val array
9
+ ml.missing_random_seed -- module uses RNG but never seeds it
10
+
11
+ Pure AST over file snapshots — never executes the model. Conservative by design:
12
+ prefers a missed case over a false alarm (these run on any Python repo, most of
13
+ which is not ML code).
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import ast
18
+ import logging
19
+
20
+ from vigil_forensic._shared import (
21
+ EvidenceReference,
22
+ GateCategory,
23
+ GateImpact,
24
+ GateSeverity,
25
+ RepairKind,
26
+ )
27
+ from vigil_forensic.gate_checks.common import build_check_result, build_finding
28
+
29
+ _log = logging.getLogger(__name__)
30
+
31
+
32
+ # --------------------------------------------------------------------------
33
+ # helpers
34
+ # --------------------------------------------------------------------------
35
+
36
+ def _negative_int(node: ast.AST) -> int | None:
37
+ """Return the negative int value of *node* if it is a negative int literal."""
38
+ if (
39
+ isinstance(node, ast.UnaryOp)
40
+ and isinstance(node.op, ast.USub)
41
+ and isinstance(node.operand, ast.Constant)
42
+ and isinstance(node.operand.value, int)
43
+ and not isinstance(node.operand.value, bool)
44
+ ):
45
+ return -node.operand.value
46
+ if (
47
+ isinstance(node, ast.Constant)
48
+ and isinstance(node.value, int)
49
+ and not isinstance(node.value, bool)
50
+ and node.value < 0
51
+ ):
52
+ return node.value
53
+ return None
54
+
55
+
56
+ def _shift_negative_period(call: ast.Call) -> int | None:
57
+ """If *call* is ``.shift(-N)`` / ``.shift(periods=-N)`` return the negative N."""
58
+ if call.args:
59
+ v = _negative_int(call.args[0])
60
+ if v is not None:
61
+ return v
62
+ for kw in call.keywords:
63
+ if kw.arg == "periods":
64
+ v = _negative_int(kw.value)
65
+ if v is not None:
66
+ return v
67
+ return None
68
+
69
+
70
+ def _arg_name_lower(node: ast.AST) -> str:
71
+ """Best-effort lowercase name of an argument expression (Name or attribute)."""
72
+ if isinstance(node, ast.Name):
73
+ return node.id.lower()
74
+ if isinstance(node, ast.Attribute):
75
+ return node.attr.lower()
76
+ if isinstance(node, ast.Subscript) and isinstance(node.value, ast.Name):
77
+ return node.value.id.lower()
78
+ return ""
79
+
80
+
81
+ # --------------------------------------------------------------------------
82
+ # check 1: look-ahead via negative shift
83
+ # --------------------------------------------------------------------------
84
+
85
+ def _check_lookahead_shift(path: str, tree: ast.AST) -> list:
86
+ findings = []
87
+ for node in ast.walk(tree):
88
+ if not (isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute)):
89
+ continue
90
+ if node.func.attr != "shift":
91
+ continue
92
+ neg = _shift_negative_period(node)
93
+ if neg is None:
94
+ continue
95
+ ln = int(getattr(node, "lineno", 0) or 0)
96
+ findings.append(build_finding(
97
+ check_id="ml.lookahead_negative_shift",
98
+ category=GateCategory.ML,
99
+ title=f"Negative .shift({neg}) leaks future data in {path}:{ln}",
100
+ severity=GateSeverity.HIGH,
101
+ impact=GateImpact.REVISE,
102
+ summary=(
103
+ f".shift({neg}) at {path}:{ln} moves a series BACKWARD, exposing future "
104
+ "values to the current row. This is look-ahead bias: it inflates "
105
+ "backtest/validation metrics and silently fails in live use."
106
+ ),
107
+ recommendation=(
108
+ "Features must only see past data: use a forward (positive) shift, "
109
+ "or if this is target construction, ensure the model never receives "
110
+ "the shifted future column as an input feature."
111
+ ),
112
+ evidence=[EvidenceReference(
113
+ kind="file", path=str(path), detail=f"line:{ln} shift({neg})",
114
+ )],
115
+ repair_kind=RepairKind.FIX_CONTRACT.value,
116
+ executor_action="Replace negative shift with a causal (positive) shift or isolate target alignment.",
117
+ proof_required="No negative .shift() feeds a feature column; backtest uses only past data.",
118
+ allowlist_allowed=True,
119
+ ))
120
+ return findings
121
+
122
+
123
+ # --------------------------------------------------------------------------
124
+ # check 2: non-deterministic train_test_split
125
+ # --------------------------------------------------------------------------
126
+
127
+ def _is_named_call(node: ast.Call, name: str) -> bool:
128
+ f = node.func
129
+ return (isinstance(f, ast.Name) and f.id == name) or (
130
+ isinstance(f, ast.Attribute) and f.attr == name
131
+ )
132
+
133
+
134
+ def _check_nondeterministic_split(path: str, tree: ast.AST) -> list:
135
+ findings = []
136
+ for node in ast.walk(tree):
137
+ if not isinstance(node, ast.Call) or not _is_named_call(node, "train_test_split"):
138
+ continue
139
+ has_seed = any(kw.arg == "random_state" for kw in node.keywords)
140
+ if has_seed:
141
+ continue
142
+ ln = int(getattr(node, "lineno", 0) or 0)
143
+ findings.append(build_finding(
144
+ check_id="ml.nondeterministic_split",
145
+ category=GateCategory.ML,
146
+ title=f"train_test_split without random_state in {path}:{ln}",
147
+ severity=GateSeverity.MEDIUM,
148
+ impact=GateImpact.REVISE,
149
+ summary=(
150
+ f"train_test_split at {path}:{ln} has no random_state. The split is "
151
+ "non-reproducible: every run shuffles differently, so metrics, "
152
+ "hyperparameter choices, and bug reports cannot be reproduced."
153
+ ),
154
+ recommendation="Pass an explicit random_state=<int> for a reproducible split.",
155
+ evidence=[EvidenceReference(
156
+ kind="file", path=str(path), detail=f"line:{ln}",
157
+ )],
158
+ repair_kind=RepairKind.FIX_CONTRACT.value,
159
+ executor_action="Add random_state=<int> to train_test_split.",
160
+ proof_required="train_test_split carries an explicit random_state.",
161
+ allowlist_allowed=True,
162
+ ))
163
+ return findings
164
+
165
+
166
+ # --------------------------------------------------------------------------
167
+ # check 3: scaler / transformer fit on test or validation data (leakage)
168
+ # --------------------------------------------------------------------------
169
+
170
+ _FIT_METHODS = frozenset({"fit", "fit_transform"})
171
+ _LEAK_TOKENS = ("test", "val", "valid", "holdout", "oot")
172
+
173
+
174
+ def _check_scaler_fit_on_test(path: str, tree: ast.AST) -> list:
175
+ findings = []
176
+ for node in ast.walk(tree):
177
+ if not (isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute)):
178
+ continue
179
+ if node.func.attr not in _FIT_METHODS or not node.args:
180
+ continue
181
+ argname = _arg_name_lower(node.args[0])
182
+ if not argname:
183
+ continue
184
+ # token must appear as a word-ish piece (x_test, test_x, X_val) — substring
185
+ # is acceptable here because these names are conventional and specific.
186
+ if not any(tok in argname for tok in _LEAK_TOKENS):
187
+ continue
188
+ ln = int(getattr(node, "lineno", 0) or 0)
189
+ findings.append(build_finding(
190
+ check_id="ml.scaler_fit_on_test",
191
+ category=GateCategory.ML,
192
+ title=f".{node.func.attr}() on '{argname}' (eval data) in {path}:{ln}",
193
+ severity=GateSeverity.HIGH,
194
+ impact=GateImpact.REVISE,
195
+ summary=(
196
+ f"{node.func.attr}() is called on '{argname}' at {path}:{ln}. Fitting a "
197
+ "scaler/transformer/model on test or validation data leaks information "
198
+ "from the eval set into training, producing optimistic, invalid metrics."
199
+ ),
200
+ recommendation=(
201
+ "Fit transforms ONLY on the training split, then .transform() (not "
202
+ "fit_transform) the test/validation split."
203
+ ),
204
+ evidence=[EvidenceReference(
205
+ kind="file", path=str(path), detail=f"line:{ln} {node.func.attr}({argname})",
206
+ )],
207
+ repair_kind=RepairKind.FIX_CONTRACT.value,
208
+ executor_action="Fit on train only; use transform() on eval data.",
209
+ proof_required="No fit/fit_transform on a *_test/*_val array.",
210
+ allowlist_allowed=True,
211
+ ))
212
+ return findings
213
+
214
+
215
+ # --------------------------------------------------------------------------
216
+ # check 4: RNG used but never seeded (non-reproducible)
217
+ # --------------------------------------------------------------------------
218
+
219
+ # Calls that *consume* randomness (attribute chains ending in these), e.g.
220
+ # np.random.rand / torch.randn / random.shuffle.
221
+ _RNG_CONSUMERS = frozenset({
222
+ "rand", "randn", "randint", "random", "choice", "shuffle", "permutation",
223
+ "normal", "uniform", "standard_normal", "sample", "randperm",
224
+ })
225
+ # Calls that *seed* an RNG.
226
+ _SEED_CALLS = frozenset({"seed", "manual_seed", "manual_seed_all", "set_seed"})
227
+
228
+
229
+ def _attr_chain(node: ast.AST) -> str:
230
+ """Return the dotted attribute chain text for a Call.func (best effort)."""
231
+ parts: list[str] = []
232
+ cur = node
233
+ while isinstance(cur, ast.Attribute):
234
+ parts.append(cur.attr)
235
+ cur = cur.value
236
+ if isinstance(cur, ast.Name):
237
+ parts.append(cur.id)
238
+ return ".".join(reversed(parts))
239
+
240
+
241
+ def _check_missing_seed(path: str, tree: ast.AST) -> list:
242
+ uses_rng = False
243
+ has_seed = False
244
+ rng_line = 0
245
+ for node in ast.walk(tree):
246
+ if not (isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute)):
247
+ continue
248
+ chain = _attr_chain(node.func)
249
+ leaf = node.func.attr
250
+ if leaf in _SEED_CALLS:
251
+ has_seed = True
252
+ # consumer must be under a random/np.random/torch namespace to avoid
253
+ # flagging unrelated .sample()/.choice() on domain objects.
254
+ if leaf in _RNG_CONSUMERS and (
255
+ "random" in chain or chain.startswith("np.") or chain.startswith("numpy.")
256
+ or chain.startswith("torch") or chain.startswith("tf.")
257
+ ):
258
+ uses_rng = True
259
+ if not rng_line:
260
+ rng_line = int(getattr(node, "lineno", 0) or 0)
261
+ # also count random_state=/seed= kwargs anywhere as "seeded"
262
+ if uses_rng and not has_seed:
263
+ for node in ast.walk(tree):
264
+ if isinstance(node, ast.keyword) and node.arg in ("random_state", "seed"):
265
+ has_seed = True
266
+ break
267
+ if uses_rng and not has_seed:
268
+ return [build_finding(
269
+ check_id="ml.missing_random_seed",
270
+ category=GateCategory.ML,
271
+ title=f"RNG used but never seeded in {path}",
272
+ severity=GateSeverity.MEDIUM,
273
+ impact=GateImpact.REVISE,
274
+ summary=(
275
+ f"{path} consumes randomness (first use at line {rng_line}) but never "
276
+ "sets a seed (np.random.seed / torch.manual_seed / random.seed) and "
277
+ "passes no random_state=. Runs are non-reproducible — results, bugs, "
278
+ "and metrics cannot be replicated."
279
+ ),
280
+ recommendation="Seed all RNGs at module/entrypoint start (np.random.seed, torch.manual_seed, random.seed) or pass random_state=.",
281
+ evidence=[EvidenceReference(
282
+ kind="file", path=str(path), detail=f"first RNG use line:{rng_line}",
283
+ )],
284
+ repair_kind=RepairKind.FIX_CONTRACT.value,
285
+ executor_action="Seed all random number generators deterministically.",
286
+ proof_required="A seed is set before any RNG consumption in the module.",
287
+ allowlist_allowed=True,
288
+ )]
289
+ return []
290
+
291
+
292
+ # --------------------------------------------------------------------------
293
+ # runner
294
+ # --------------------------------------------------------------------------
295
+
296
+ def run_ml_checks(ctx) -> "object":
297
+ """Run all ML/NN correctness checks over the snapshot corpus (static)."""
298
+ findings: list = []
299
+ snapshots = getattr(ctx, "file_snapshots", None) or {}
300
+ for path, snap in snapshots.items():
301
+ if not str(path).endswith(".py"):
302
+ continue
303
+ content = getattr(snap, "text", None)
304
+ if not content:
305
+ continue
306
+ try:
307
+ tree = ast.parse(content)
308
+ except SyntaxError:
309
+ continue
310
+ findings.extend(_check_lookahead_shift(path, tree))
311
+ findings.extend(_check_nondeterministic_split(path, tree))
312
+ findings.extend(_check_scaler_fit_on_test(path, tree))
313
+ findings.extend(_check_missing_seed(path, tree))
314
+ return build_check_result(
315
+ check_id="ml_checks",
316
+ category=GateCategory.ML,
317
+ findings=findings,
318
+ )
@@ -0,0 +1,106 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+
5
+ from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity, RepairKind
6
+ from vigil_forensic.gate_models import PostExecGateContext
7
+ from ..source_analysis import is_source_file
8
+ from .common import build_check_result, build_finding, iter_touched_snapshots, normalize_path
9
+ from ._ast_helpers import parse_python_source_or_emit_finding
10
+ import logging
11
+ _log = logging.getLogger(__name__)
12
+
13
+
14
+ EXPENSIVE_CALL_NAMES = {
15
+ "read_text",
16
+ "read_bytes",
17
+ "subprocess.run",
18
+ "check_output",
19
+ "path_exists",
20
+ "execute",
21
+ "connect",
22
+ "os.system",
23
+ "shutil.copy",
24
+ "shutil.copytree",
25
+ "shutil.move",
26
+ }
27
+
28
+ # Files that ARE file-processors by design — reading files in loops is their job.
29
+ # Flagging these generates noise without actionable signal.
30
+ _FILE_PROCESSOR_PATH_FRAGMENTS = (
31
+ "gate_checks/",
32
+ "map_builder/",
33
+ "source_adapters/",
34
+ )
35
+
36
+
37
+ def run_performance_checks(ctx: PostExecGateContext):
38
+ findings = []
39
+ profile = ctx.repo_profile
40
+ for snapshot in iter_touched_snapshots(ctx):
41
+ if not snapshot.exists or not is_source_file(snapshot.path):
42
+ continue
43
+ if profile and not profile.is_performance_sensitive(snapshot.path):
44
+ continue
45
+ norm_path = snapshot.path.replace("\\", "/")
46
+ if any(frag in norm_path for frag in _FILE_PROCESSOR_PATH_FRAGMENTS):
47
+ continue
48
+ # Sprint C2 (2026-04-23): prefer TestTopology.is_test_path. Legacy
49
+ # basename check preserved as fallback for contexts where
50
+ # ProjectContext.test_topology hasn't been built (older call sites,
51
+ # unit tests constructing a PostExecGateContext by hand).
52
+ topology = getattr(getattr(ctx, "project_context", None), "test_topology", None)
53
+ if topology is not None:
54
+ if topology.is_test_path(norm_path):
55
+ continue
56
+ elif norm_path.split("/")[-1].startswith("test_"):
57
+ continue
58
+ # B4 (2026-04-23): replaces silent `except SyntaxError: continue` —
59
+ # meta.syntax_parse_error is now emitted on broken Python sources.
60
+ tree = parse_python_source_or_emit_finding(
61
+ snapshot.text,
62
+ rel_path=normalize_path(snapshot.path),
63
+ emit_finding=findings.append,
64
+ emitting_gate="performance.expensive_in_loop",
65
+ )
66
+ if tree is None:
67
+ continue
68
+ for node in ast.walk(tree):
69
+ if not isinstance(node, (ast.For, ast.AsyncFor, ast.While)):
70
+ continue
71
+ for child in ast.walk(node):
72
+ if isinstance(child, ast.Call):
73
+ name = _call_name(child)
74
+ parts = name.rsplit(".", 1)
75
+ bare = parts[1] if len(parts) == 2 else None
76
+ if name in EXPENSIVE_CALL_NAMES or (bare and bare in EXPENSIVE_CALL_NAMES):
77
+ findings.append(
78
+ build_finding(
79
+ check_id="performance.expensive_in_loop",
80
+ category=GateCategory.PERFORMANCE,
81
+ title="Touched code performs expensive work inside a loop",
82
+ severity=GateSeverity.HIGH,
83
+ impact=GateImpact.REVISE,
84
+ summary=f"{snapshot.path} calls '{name}' inside a loop, which is a likely hot-path anti-pattern.",
85
+ recommendation="Batch the work, cache repeated reads, or move the expensive call out of the loop.",
86
+ evidence=[EvidenceReference(kind="file", path=snapshot.path, detail=name)],
87
+ repair_kind=RepairKind.REFACTOR.value,
88
+ executor_action="Optimize hot code paths",
89
+ proof_required="Performance acceptable",
90
+ allowlist_allowed=False,
91
+ )
92
+ )
93
+ return build_check_result(check_id="performance", category=GateCategory.PERFORMANCE, findings=findings)
94
+
95
+
96
+ def _call_name(node: ast.Call) -> str:
97
+ """Return a qualified call name like 'subprocess.run' or bare 'load'."""
98
+ func = node.func
99
+ if isinstance(func, ast.Attribute):
100
+ if isinstance(func.value, ast.Name):
101
+ return f"{func.value.id}.{func.attr}"
102
+ # self.executor.run -> just attr
103
+ return str(func.attr)
104
+ if isinstance(func, ast.Name):
105
+ return str(func.id)
106
+ return ""