vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,749 @@
1
+ """TypeScript source adapter -- tree-sitter structural extractor.
2
+
3
+ ``extract_imports`` and ``extract_symbols`` are backed by tree-sitter for
4
+ true AST accuracy; all emitted IR items carry ``confidence=1.0``.
5
+
6
+ ``extract_contracts``, ``extract_runtime``, and ``extract_writer_calls``
7
+ remain on the original regex+lexer approach (separate sub-phase migration).
8
+
9
+ Capabilities (L7a scope):
10
+ - supports_structural = True (extract_imports + extract_symbols)
11
+ - supports_contracts = True (L6b -- extract_contracts)
12
+ - supports_runtime_signals = True (L6a -- extract_runtime)
13
+ - supports_authority_writes = True (L7a -- extract_writer_calls)
14
+
15
+ tree-sitter confidence:
16
+ - 1.0 for all items from extract_imports / extract_symbols.
17
+
18
+ Regex confidence scale (contracts / runtime / writer_calls unchanged):
19
+ - 0.9 -- clean absolute ES-module forms, exported symbols, etc.
20
+ - 0.8 -- relative ES-module imports, non-exported symbols.
21
+ - 0.7 -- dynamic ``import('...')`` and zod schemas.
22
+
23
+ Known limitations (explicit L2 tech-debt, do NOT fix here):
24
+ - Template-literal module specifiers (``import(`${base}/mod`)``) are
25
+ skipped -- tree-sitter argument is not a plain string literal.
26
+ - Decorators are not emitted as symbols.
27
+ - JSX inside a .tsx file is not inspected beyond its enclosing
28
+ ``export const Foo = ...`` or ``export function Foo(...)``.
29
+ - ``declare module '...'`` and ambient declarations are ignored.
30
+ """
31
+ from __future__ import annotations
32
+
33
+ import logging
34
+ from pathlib import Path
35
+
36
+ import re
37
+
38
+ from ._base import RegexAdapterBase
39
+ from ._ir import AuthorityWriteCandidate, ContractCandidate, ImportEdge, SymbolDef, TSRuntimeSignal
40
+ from ._lexer import (
41
+ join_multiline_imports,
42
+ strip_comments_and_strings,
43
+ strip_comments_only,
44
+ )
45
+ from ._patterns import (
46
+ classify_import,
47
+ )
48
+ from ._treesitter import (
49
+ iter_named_children,
50
+ node_line,
51
+ node_text,
52
+ parse_bytes,
53
+ walk_named,
54
+ )
55
+
56
+ _TS_LANGUAGE = "typescript"
57
+
58
+ __all__ = ["TypescriptAdapter"]
59
+
60
+ _log = logging.getLogger(__name__)
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Internal tree-sitter helpers (used only by extract_imports / extract_symbols)
65
+ # ---------------------------------------------------------------------------
66
+
67
+ def _string_module(string_node, src: bytes) -> str:
68
+ """Extract the bare module specifier from a tree-sitter ``string`` node.
69
+
70
+ Looks for a ``string_fragment`` child first; falls back to stripping quote
71
+ characters from the full node text (handles both single and double quotes).
72
+ """
73
+ for child in string_node.children:
74
+ if child.type == "string_fragment":
75
+ return node_text(child, src)
76
+ raw = node_text(string_node, src)
77
+ return raw.strip("'\"")
78
+
79
+
80
+ def _find_dynamic_import_module(node, src: bytes) -> str | None:
81
+ """Recursively search *node* for a dynamic ``import('literal')`` call.
82
+
83
+ Returns the module specifier if the argument is a string literal, else None.
84
+ Dynamic imports with non-literal arguments are intentionally skipped.
85
+ """
86
+ if node.type == "call_expression":
87
+ for child in node.children:
88
+ if child.type == "import":
89
+ # dynamic import — extract first string argument
90
+ for sibling in node.children:
91
+ if sibling.is_named and sibling.type == "arguments":
92
+ for arg in sibling.children:
93
+ if arg.is_named and arg.type == "string":
94
+ return _string_module(arg, src)
95
+ return None # non-literal argument — skip
96
+ for child in node.children:
97
+ result = _find_dynamic_import_module(child, src)
98
+ if result is not None:
99
+ return result
100
+ return None
101
+
102
+
103
+ class TypescriptAdapter(RegexAdapterBase):
104
+ """TypeScript adapter -- ES-module imports + TS declarations via regex.
105
+
106
+ Operates on both ``.ts`` and ``.tsx``. Structural capability only for
107
+ L2; all other supports_* flags are False until later phases wire the
108
+ corresponding builders to IR dispatch.
109
+ """
110
+
111
+ language = "typescript"
112
+ file_extensions = (".ts", ".tsx")
113
+ supports_structural = True
114
+ supports_contracts = True
115
+ supports_runtime_signals = True
116
+ supports_authority_writes = True
117
+
118
+ # ------------------------------------------------------------------
119
+ # Authority write patterns (L7a)
120
+ # ------------------------------------------------------------------
121
+
122
+ # fs.writeFile / fs.writeFileSync
123
+ _RE_FS_WRITE = re.compile(
124
+ r"\bfs\.writeFile(?:Sync)?\s*\(([^,)]{0,60})",
125
+ )
126
+ # standalone writeFile / writeFileSync (no fs. prefix)
127
+ _RE_STANDALONE_WRITE = re.compile(
128
+ r"(?<![.\w])writeFile(?:Sync)?\s*\(([^,)]{0,60})",
129
+ )
130
+ # fs.appendFile / fs.appendFileSync
131
+ _RE_FS_APPEND = re.compile(
132
+ r"\bfs\.appendFile(?:Sync)?\s*\(([^,)]{0,60})",
133
+ )
134
+ # .save( / .save() — on any receiver
135
+ _RE_ORM_SAVE = re.compile(
136
+ r"\.\s*save\s*\(\s*",
137
+ )
138
+ # ORM-looking .create( / .update( / .upsert( — only on repo./db./repository./model. receivers
139
+ _RE_ORM_WRITE = re.compile(
140
+ r"\b(?:repo|db|repository|model)\s*\.\s*(?:create|update|upsert)\s*\(",
141
+ )
142
+ # prisma.X.create/update/upsert/delete
143
+ _RE_PRISMA = re.compile(
144
+ r"\bprisma\s*\.\s*([A-Za-z_$][A-Za-z0-9_$]*)\s*\.\s*(create|update|upsert|delete)\s*\(",
145
+ )
146
+ # supabase.from(...).insert/update/upsert/delete
147
+ _RE_SUPABASE = re.compile(
148
+ r"\bsupabase\s*\.\s*from\s*\([^)]{0,60}\)\s*\.\s*(insert|update|upsert|delete)\s*\(",
149
+ )
150
+ # localStorage.setItem / sessionStorage.setItem
151
+ _RE_STORAGE_SET = re.compile(
152
+ r"\b(?:localStorage|sessionStorage)\s*\.\s*setItem\s*\(",
153
+ )
154
+
155
+ # ------------------------------------------------------------------
156
+ # Runtime signal patterns (L6a)
157
+ # ------------------------------------------------------------------
158
+
159
+ # A. Next.js App Router: named export of HTTP method (function form)
160
+ _RE_APP_ROUTER_FN = re.compile(
161
+ r"^export\s+(async\s+)?function\s+(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\b",
162
+ re.MULTILINE,
163
+ )
164
+ # A2. Next.js App Router: const export of HTTP method
165
+ _RE_APP_ROUTER_CONST = re.compile(
166
+ r"^export\s+const\s+(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s*=",
167
+ re.MULTILINE,
168
+ )
169
+ # B. Next.js Pages Router API: export default function/arrow
170
+ _RE_PAGES_API_DEFAULT = re.compile(
171
+ r"^export\s+default\s+(async\s+)?(?:function\b|\()",
172
+ re.MULTILINE,
173
+ )
174
+ # C. Next.js middleware: named middleware function/const/default
175
+ _RE_MIDDLEWARE = re.compile(
176
+ r"^export\s+(?:(?:async\s+)?function\s+middleware\b|(?:const|default)\s+middleware\b)",
177
+ re.MULTILINE,
178
+ )
179
+ # D. Server bootstrap / module_init
180
+ _RE_BOOTSTRAP = re.compile(
181
+ r"^(?:(?:app|server)\.(?:listen|start)\s*\(|createServer\s*\(|new\s+(?:http\.Server|https\.Server)\s*\()",
182
+ re.MULTILINE,
183
+ )
184
+ # E. Background job / cron (top-level lines starting with identifier or export)
185
+ _RE_BACKGROUND = re.compile(
186
+ r"^(?:export\s+)?(?:cron|schedule|setInterval|setTimeout)\s*\(",
187
+ re.MULTILINE,
188
+ )
189
+ # F. Environment variable access
190
+ _RE_ENV_ACCESS = re.compile(
191
+ r"process\.env\.([A-Z_][A-Z0-9_]*)",
192
+ )
193
+
194
+ # ------------------------------------------------------------------
195
+ # Contract patterns (L6b)
196
+ # ------------------------------------------------------------------
197
+
198
+ # A. interface X { ... } — any combo of export / declare modifiers
199
+ _RE_CONTRACT_INTERFACE = re.compile(
200
+ r"^(?:export\s+)?(?:declare\s+)?interface\s+"
201
+ r"([A-Za-z_$][A-Za-z0-9_$]*)(?:\s*<[^{]*>)?\s*\{",
202
+ re.MULTILINE,
203
+ )
204
+ # B. type X = { ... } — object-literal shape only (must end with `= {`)
205
+ _RE_CONTRACT_TYPE_OBJECT = re.compile(
206
+ r"^(?:export\s+)?(?:declare\s+)?type\s+"
207
+ r"([A-Za-z_$][A-Za-z0-9_$]*)(?:\s*<[^=]*>)?\s*=\s*\{",
208
+ re.MULTILINE,
209
+ )
210
+ # C. const/let/var X = z.object(...)
211
+ _RE_CONTRACT_ZOD = re.compile(
212
+ r"(?:export\s+)?(?:const|let|var)\s+"
213
+ r"([A-Za-z_$][A-Za-z0-9_$]*)\s*=\s*z\.object\s*\(",
214
+ re.MULTILINE,
215
+ )
216
+
217
+ # ------------------------------------------------------------------
218
+ # Contracts (L6b)
219
+ # ------------------------------------------------------------------
220
+
221
+ def extract_contracts(
222
+ self, content: str, path: Path
223
+ ) -> list[ContractCandidate]:
224
+ """Return ContractCandidate objects for TS type contracts.
225
+
226
+ Detected:
227
+ - ``interface X { }`` -> kind="interface", confidence=0.9
228
+ - ``type X = { }`` (object literal only) -> kind="type_object", confidence=0.8
229
+ - ``const X = z.object(...)`` -> kind="zod_schema", confidence=0.7
230
+
231
+ Exclusions:
232
+ - Test files (``*.test.ts``, ``*.spec.ts``, paths with ``__tests__/``).
233
+ - Scalar aliases (``type X = string`` etc.) -- naturally excluded because
234
+ the type_object pattern requires ``= {``.
235
+ - Union / intersection types without ``{`` -- same reason.
236
+
237
+ Uses ``strip_comments_and_strings`` so patterns inside comments or string
238
+ literals do not produce false positives.
239
+
240
+ Sorted by ``(line, name)``.
241
+ """
242
+ _log.debug("extract_contracts: %s (%d chars)", path, len(content))
243
+
244
+ # Test-file exclusion
245
+ path_posix = Path(path).as_posix()
246
+ name = Path(path).name
247
+ if name.endswith(".test.ts") or name.endswith(".spec.ts"):
248
+ return []
249
+ if name.endswith(".test.tsx") or name.endswith(".spec.tsx"):
250
+ return []
251
+ if "__tests__/" in path_posix:
252
+ return []
253
+
254
+ cleaned = strip_comments_and_strings(content, self.language)
255
+ candidates: list[ContractCandidate] = []
256
+
257
+ for m in self._RE_CONTRACT_INTERFACE.finditer(cleaned):
258
+ candidates.append(ContractCandidate(
259
+ name=m.group(1),
260
+ contract_kind="interface",
261
+ line=self._line_of(m.start(), cleaned),
262
+ confidence=0.9,
263
+ ))
264
+
265
+ for m in self._RE_CONTRACT_TYPE_OBJECT.finditer(cleaned):
266
+ candidates.append(ContractCandidate(
267
+ name=m.group(1),
268
+ contract_kind="type_object",
269
+ line=self._line_of(m.start(), cleaned),
270
+ confidence=0.8,
271
+ ))
272
+
273
+ for m in self._RE_CONTRACT_ZOD.finditer(cleaned):
274
+ candidates.append(ContractCandidate(
275
+ name=m.group(1),
276
+ contract_kind="zod_schema",
277
+ line=self._line_of(m.start(), cleaned),
278
+ confidence=0.7,
279
+ ))
280
+
281
+ candidates.sort(key=lambda c: (c.line, c.name))
282
+ _log.debug("extract_contracts: %s -> %d candidates", path, len(candidates))
283
+ return candidates
284
+
285
+ # ------------------------------------------------------------------
286
+ # Authority writes (L7a)
287
+ # ------------------------------------------------------------------
288
+
289
+ def extract_writer_calls(
290
+ self, content: str, path: Path
291
+ ) -> list[AuthorityWriteCandidate]:
292
+ """Detect write/save operations in TS/JS source.
293
+
294
+ Detected patterns and confidence:
295
+ - ``fs.writeFile(``, ``fs.writeFileSync(`` -> fs_write, 0.9
296
+ - ``fs.appendFile(``, ``fs.appendFileSync(`` -> fs_append, 0.9
297
+ - standalone ``writeFile(``, ``writeFileSync(`` -> fs_write, 0.85
298
+ - ``.save(`` -> orm_save, 0.75
299
+ - ``repo.create(``, ``db.update(`` etc. -> orm_write, 0.7
300
+ - ``prisma.X.create/update/upsert/delete(`` -> prisma_write, 0.85
301
+ - ``supabase.from(...).insert/update/...`` -> supabase_write, 0.85
302
+ - ``localStorage.setItem(``, ``sessionStorage.setItem(`` -> storage_write, 0.7
303
+
304
+ HTTP .put() / .post() on fetch/axios are intentionally excluded (too noisy).
305
+ Uses ``_preprocess`` before matching.
306
+ Sorted by ``(line, write_kind)``.
307
+ """
308
+ _log.debug("extract_writer_calls: %s (%d chars)", path, len(content))
309
+ path_posix = path.as_posix()
310
+ cleaned = self._preprocess(content)
311
+ candidates: list[AuthorityWriteCandidate] = []
312
+
313
+ def _hint(group_text: str) -> str:
314
+ """Trim first argument fragment to 30 chars as target hint."""
315
+ h = group_text.strip().strip("\"'`").strip()
316
+ return h[:30]
317
+
318
+ # fs.writeFile / fs.writeFileSync
319
+ for m in self._RE_FS_WRITE.finditer(cleaned):
320
+ candidates.append(AuthorityWriteCandidate(
321
+ write_kind="fs_write",
322
+ target_hint=_hint(m.group(1)),
323
+ line=self._line_of(m.start(), cleaned),
324
+ confidence=0.9,
325
+ ))
326
+
327
+ # fs.appendFile / fs.appendFileSync
328
+ for m in self._RE_FS_APPEND.finditer(cleaned):
329
+ candidates.append(AuthorityWriteCandidate(
330
+ write_kind="fs_append",
331
+ target_hint=_hint(m.group(1)),
332
+ line=self._line_of(m.start(), cleaned),
333
+ confidence=0.9,
334
+ ))
335
+
336
+ # standalone writeFile / writeFileSync (no fs. prefix — already caught above via fs.*)
337
+ # Use negative lookbehind in pattern so fs.writeFile is not double-counted.
338
+ for m in self._RE_STANDALONE_WRITE.finditer(cleaned):
339
+ candidates.append(AuthorityWriteCandidate(
340
+ write_kind="fs_write",
341
+ target_hint=_hint(m.group(1)),
342
+ line=self._line_of(m.start(), cleaned),
343
+ confidence=0.85,
344
+ ))
345
+
346
+ # prisma.X.create/update/upsert/delete
347
+ for m in self._RE_PRISMA.finditer(cleaned):
348
+ candidates.append(AuthorityWriteCandidate(
349
+ write_kind="prisma_write",
350
+ target_hint=m.group(1)[:30],
351
+ line=self._line_of(m.start(), cleaned),
352
+ confidence=0.85,
353
+ ))
354
+
355
+ # supabase.from(...).insert/update/upsert/delete
356
+ for m in self._RE_SUPABASE.finditer(cleaned):
357
+ candidates.append(AuthorityWriteCandidate(
358
+ write_kind="supabase_write",
359
+ target_hint=m.group(1)[:30],
360
+ line=self._line_of(m.start(), cleaned),
361
+ confidence=0.85,
362
+ ))
363
+
364
+ # ORM .save()
365
+ for m in self._RE_ORM_SAVE.finditer(cleaned):
366
+ candidates.append(AuthorityWriteCandidate(
367
+ write_kind="orm_save",
368
+ target_hint="",
369
+ line=self._line_of(m.start(), cleaned),
370
+ confidence=0.75,
371
+ ))
372
+
373
+ # ORM .create / .update / .upsert on repo/db/repository/model
374
+ for m in self._RE_ORM_WRITE.finditer(cleaned):
375
+ candidates.append(AuthorityWriteCandidate(
376
+ write_kind="orm_write",
377
+ target_hint="",
378
+ line=self._line_of(m.start(), cleaned),
379
+ confidence=0.7,
380
+ ))
381
+
382
+ # localStorage / sessionStorage .setItem
383
+ for m in self._RE_STORAGE_SET.finditer(cleaned):
384
+ candidates.append(AuthorityWriteCandidate(
385
+ write_kind="storage_write",
386
+ target_hint="",
387
+ line=self._line_of(m.start(), cleaned),
388
+ confidence=0.7,
389
+ ))
390
+
391
+ candidates.sort(key=lambda c: (c.line, c.write_kind))
392
+ _log.debug("extract_writer_calls: %s -> %d candidates", path_posix, len(candidates))
393
+ return candidates
394
+
395
+ # ------------------------------------------------------------------
396
+ # Preprocess hook — strip comments/strings, then collapse multiline imports
397
+ # ------------------------------------------------------------------
398
+
399
+ def _preprocess(self, content: str) -> str:
400
+ """Apply the shared C-family lexer in the canonical order."""
401
+ stripped = strip_comments_and_strings(content, self.language)
402
+ return join_multiline_imports(stripped, self.language)
403
+
404
+ # ------------------------------------------------------------------
405
+ # Runtime signals (L6a)
406
+ # ------------------------------------------------------------------
407
+
408
+ def extract_runtime(self, content: str, path: Path) -> list[TSRuntimeSignal]: # type: ignore[override]
409
+ """Detect Next.js routes, middleware, server bootstrap, background jobs,
410
+ and env-var accesses in TS/TSX files.
411
+
412
+ Uses ``strip_comments_only`` so string bodies remain visible (needed for
413
+ path-based guards) while comment false-positives are suppressed.
414
+
415
+ Strict exclusions (path-based, no JSX parsing):
416
+ - Test files: ``*.test.ts``, ``*.spec.ts``, paths containing ``__tests__/``.
417
+ - UI component directories: paths containing ``components/`` or
418
+ paths under ``app/`` / ``pages/`` that are NOT ``app/api/`` or
419
+ ``pages/api/``.
420
+
421
+ Sorted output by ``(line, kind)``.
422
+ """
423
+ _log.debug("extract_runtime: %s (%d chars)", path, len(content))
424
+
425
+ path_posix = Path(path).as_posix()
426
+
427
+ # ------ Exclusion guards ------
428
+ # Test files
429
+ name = Path(path).name
430
+ if name.endswith(".test.ts") or name.endswith(".spec.ts"):
431
+ return []
432
+ if "__tests__/" in path_posix:
433
+ return []
434
+
435
+ # UI component directories (not api paths)
436
+ if "components/" in path_posix:
437
+ return []
438
+ # pages/ but NOT pages/api/
439
+ if "pages/" in path_posix and "pages/api/" not in path_posix:
440
+ return []
441
+ # app/ but NOT app/api/
442
+ if "app/" in path_posix and "app/api/" not in path_posix:
443
+ return []
444
+
445
+ # ------ Preprocessing ------
446
+ # strip_comments_only: comments removed, strings kept (needed for path checks)
447
+ cleaned = strip_comments_only(content, self.language)
448
+
449
+ signals: list[TSRuntimeSignal] = []
450
+
451
+ # ------ A. App Router / Pages API routes ------
452
+ is_app_api = "app/api/" in path_posix
453
+ is_pages_api = "pages/api/" in path_posix
454
+
455
+ if is_app_api:
456
+ # Function-form HTTP method exports
457
+ for m in self._RE_APP_ROUTER_FN.finditer(cleaned):
458
+ method = m.group(2)
459
+ line = self._line_of(m.start(), cleaned)
460
+ signals.append(TSRuntimeSignal(
461
+ kind="framework_route",
462
+ file=path_posix,
463
+ line=line,
464
+ confidence=0.9,
465
+ payload={
466
+ "route_path": path_posix,
467
+ "http_methods": [method],
468
+ "framework": "nextjs",
469
+ },
470
+ ))
471
+ # Const-form HTTP method exports
472
+ for m in self._RE_APP_ROUTER_CONST.finditer(cleaned):
473
+ method = m.group(1)
474
+ line = self._line_of(m.start(), cleaned)
475
+ signals.append(TSRuntimeSignal(
476
+ kind="framework_route",
477
+ file=path_posix,
478
+ line=line,
479
+ confidence=0.9,
480
+ payload={
481
+ "route_path": path_posix,
482
+ "http_methods": [method],
483
+ "framework": "nextjs",
484
+ },
485
+ ))
486
+
487
+ elif is_pages_api:
488
+ # B. Pages Router: export default
489
+ for m in self._RE_PAGES_API_DEFAULT.finditer(cleaned):
490
+ line = self._line_of(m.start(), cleaned)
491
+ signals.append(TSRuntimeSignal(
492
+ kind="framework_route",
493
+ file=path_posix,
494
+ line=line,
495
+ confidence=0.9,
496
+ payload={
497
+ "route_path": path_posix,
498
+ "http_methods": ["*"],
499
+ "framework": "nextjs",
500
+ },
501
+ ))
502
+
503
+ # ------ C. Middleware ------
504
+ # File must be named middleware.ts or middleware.tsx at project root or src/
505
+ fname_no_ext = Path(path).stem
506
+ if fname_no_ext == "middleware":
507
+ for m in self._RE_MIDDLEWARE.finditer(cleaned):
508
+ line = self._line_of(m.start(), cleaned)
509
+ signals.append(TSRuntimeSignal(
510
+ kind="middleware",
511
+ file=path_posix,
512
+ line=line,
513
+ confidence=0.9,
514
+ payload={"framework": "nextjs"},
515
+ ))
516
+
517
+ # ------ D. Server bootstrap ------
518
+ for m in self._RE_BOOTSTRAP.finditer(cleaned):
519
+ matched_call = cleaned[m.start():m.end()].strip()
520
+ line = self._line_of(m.start(), cleaned)
521
+ signals.append(TSRuntimeSignal(
522
+ kind="module_init",
523
+ file=path_posix,
524
+ line=line,
525
+ confidence=0.7,
526
+ payload={"call": matched_call},
527
+ ))
528
+
529
+ # ------ E. Background job / cron ------
530
+ for m in self._RE_BACKGROUND.finditer(cleaned):
531
+ matched_call = cleaned[m.start():m.end()].strip()
532
+ line = self._line_of(m.start(), cleaned)
533
+ signals.append(TSRuntimeSignal(
534
+ kind="background_job",
535
+ file=path_posix,
536
+ line=line,
537
+ confidence=0.7,
538
+ payload={"call": matched_call},
539
+ ))
540
+
541
+ # ------ F. Env access (deduplicate per var name, keep first occurrence) ------
542
+ seen_env: dict[str, int] = {}
543
+ for m in self._RE_ENV_ACCESS.finditer(cleaned):
544
+ var_name = m.group(1)
545
+ line = self._line_of(m.start(), cleaned)
546
+ if var_name not in seen_env:
547
+ seen_env[var_name] = line
548
+
549
+ for var_name, line in sorted(seen_env.items(), key=lambda kv: kv[1]):
550
+ signals.append(TSRuntimeSignal(
551
+ kind="env_access",
552
+ file=path_posix,
553
+ line=line,
554
+ confidence=0.9,
555
+ payload={"env_var": var_name},
556
+ ))
557
+
558
+ signals.sort(key=lambda s: (s.line, s.kind))
559
+ _log.debug("extract_runtime: %s -> %d signals", path, len(signals))
560
+ return signals
561
+
562
+ # ------------------------------------------------------------------
563
+ # Structural: imports (tree-sitter, confidence=1.0)
564
+ # ------------------------------------------------------------------
565
+
566
+ def extract_imports(self, content: str, path: Path) -> list[ImportEdge]:
567
+ """Return one ImportEdge per ES-module / dynamic import statement.
568
+
569
+ Backed by tree-sitter for true AST accuracy. All items carry
570
+ ``confidence=1.0``.
571
+
572
+ Handled forms:
573
+ ``import X from 'Y'`` -- default import
574
+ ``import { A, B } from 'Y'`` -- named imports
575
+ ``import * as X from 'Y'`` -- namespace import
576
+ ``import 'Y'`` -- side-effect import
577
+ ``import type X from 'Y'`` -- type-only default import
578
+ ``import type { X } from 'Y'`` -- type-only named imports
579
+ ``export { A, B } from 'Y'`` -- re-export named
580
+ ``export * from 'Y'`` -- re-export star
581
+ ``export * as NS from 'Y'`` -- re-export namespace
582
+ Dynamic ``import('Y')`` (literal) -- dynamic import
583
+
584
+ Dynamic imports with non-literal arguments are silently skipped
585
+ (consistent with prior adapter behaviour and JS adapter).
586
+ """
587
+ _log.debug("extract_imports (tree-sitter): %s (%d chars)", path, len(content))
588
+ src: bytes = content.encode("utf-8", errors="replace")
589
+ root = parse_bytes(_TS_LANGUAGE, src)
590
+ from_path = Path(path).as_posix()
591
+
592
+ edges: list[ImportEdge] = []
593
+ seen: set[tuple[int, str]] = set()
594
+
595
+ def _emit(module: str, line: int) -> None:
596
+ if not module:
597
+ return
598
+ key = (line, module)
599
+ if key in seen:
600
+ return
601
+ seen.add(key)
602
+ edges.append(ImportEdge(
603
+ from_file=from_path,
604
+ to_module=module,
605
+ kind=classify_import(module),
606
+ line=line,
607
+ confidence=1.0,
608
+ ))
609
+
610
+ # -----------------------------------------------------------
611
+ # Pass 1: static import/export statements (always top-level)
612
+ # -----------------------------------------------------------
613
+ for node in root.children:
614
+ if not node.is_named:
615
+ continue
616
+
617
+ if node.type == "import_statement":
618
+ # Covers: default, named, namespace, side-effect, type-only.
619
+ # The module specifier is the last ``string`` child.
620
+ for child in node.children:
621
+ if child.is_named and child.type == "string":
622
+ _emit(_string_module(child, src), node_line(node))
623
+ break
624
+
625
+ elif node.type == "export_statement":
626
+ # Re-exports: ``export { X } from '...'``, ``export * from '...'``.
627
+ # A re-export has a ``string`` child at the top level.
628
+ # Exported declarations are handled in extract_symbols, not here.
629
+ for child in node.children:
630
+ if child.is_named and child.type == "string":
631
+ _emit(_string_module(child, src), node_line(node))
632
+ break
633
+
634
+ # -----------------------------------------------------------
635
+ # Pass 2: dynamic ``import('literal')`` calls anywhere in the tree.
636
+ # Dynamic imports can appear inside function bodies, class methods,
637
+ # conditionals, etc. — regex adapter found them everywhere, so we
638
+ # walk the full tree here to preserve parity.
639
+ # -----------------------------------------------------------
640
+ for call_node in walk_named(root, "call_expression"):
641
+ # _find_dynamic_import_module checks whether this call_expression
642
+ # is actually a dynamic import (callee is the ``import`` keyword).
643
+ module = _find_dynamic_import_module(call_node, src)
644
+ if module is not None:
645
+ _emit(module, node_line(call_node))
646
+
647
+ edges.sort(key=lambda e: (e.line, e.to_module, e.kind))
648
+ return edges
649
+
650
+ # ------------------------------------------------------------------
651
+ # Structural: symbols (tree-sitter, confidence=1.0)
652
+ # ------------------------------------------------------------------
653
+
654
+ def extract_symbols(self, content: str, path: Path) -> list[SymbolDef]:
655
+ """Return one SymbolDef per top-level declaration in *content*.
656
+
657
+ Backed by tree-sitter for true AST accuracy. All items carry
658
+ ``confidence=1.0``.
659
+
660
+ Detected kinds:
661
+ class -- ``class_declaration`` / ``abstract_class_declaration``
662
+ interface -- ``interface_declaration``
663
+ type -- ``type_alias_declaration``
664
+ enum -- ``enum_declaration``
665
+ function -- ``function_declaration``
666
+ const -- ``lexical_declaration`` / ``variable_declaration``
667
+
668
+ Visibility:
669
+ - ``"public"`` -- declaration is wrapped in an ``export_statement``
670
+ - ``"module"`` -- declaration is not exported
671
+ """
672
+ _log.debug("extract_symbols (tree-sitter): %s (%d chars)", path, len(content))
673
+ src: bytes = content.encode("utf-8", errors="replace")
674
+ root = parse_bytes(_TS_LANGUAGE, src)
675
+
676
+ syms: list[SymbolDef] = []
677
+
678
+ def _emit(name: str, kind: str, line: int, exported: bool) -> None:
679
+ syms.append(SymbolDef(
680
+ name=name,
681
+ kind=kind,
682
+ line=line,
683
+ visibility="public" if exported else "module",
684
+ confidence=1.0,
685
+ ))
686
+
687
+ def _process_declaration(decl_node, exported: bool) -> None:
688
+ """Extract symbol(s) from a single declaration node."""
689
+ t = decl_node.type
690
+
691
+ if t in ("class_declaration", "abstract_class_declaration"):
692
+ # TS class names are ``type_identifier`` nodes.
693
+ for child in decl_node.children:
694
+ if child.is_named and child.type == "type_identifier":
695
+ _emit(node_text(child, src), "class", node_line(decl_node), exported)
696
+ break
697
+
698
+ elif t == "interface_declaration":
699
+ for child in decl_node.children:
700
+ if child.is_named and child.type == "type_identifier":
701
+ _emit(node_text(child, src), "interface", node_line(decl_node), exported)
702
+ break
703
+
704
+ elif t == "type_alias_declaration":
705
+ for child in decl_node.children:
706
+ if child.is_named and child.type == "type_identifier":
707
+ _emit(node_text(child, src), "type", node_line(decl_node), exported)
708
+ break
709
+
710
+ elif t == "enum_declaration":
711
+ # enum names are plain ``identifier`` nodes in the TS grammar.
712
+ for child in decl_node.children:
713
+ if child.is_named and child.type == "identifier":
714
+ _emit(node_text(child, src), "enum", node_line(decl_node), exported)
715
+ break
716
+
717
+ elif t == "function_declaration":
718
+ for child in iter_named_children(decl_node, "identifier"):
719
+ _emit(node_text(child, src), "function", node_line(decl_node), exported)
720
+ break
721
+
722
+ elif t in ("lexical_declaration", "variable_declaration"):
723
+ for var_decl in iter_named_children(decl_node, "variable_declarator"):
724
+ for child in var_decl.children:
725
+ if not child.is_named:
726
+ continue
727
+ if child.type == "identifier":
728
+ _emit(
729
+ node_text(child, src),
730
+ "const",
731
+ node_line(decl_node),
732
+ exported,
733
+ )
734
+ break # one declarator → one symbol (first identifier wins)
735
+
736
+ for node in root.children:
737
+ if not node.is_named:
738
+ continue
739
+
740
+ if node.type == "export_statement":
741
+ for child in node.children:
742
+ if not child.is_named:
743
+ continue
744
+ _process_declaration(child, exported=True)
745
+ else:
746
+ _process_declaration(node, exported=False)
747
+
748
+ syms.sort(key=lambda s: (s.line, s.name))
749
+ return syms