thailint 0.5.0__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. src/__init__.py +1 -0
  2. src/analyzers/__init__.py +4 -3
  3. src/analyzers/ast_utils.py +54 -0
  4. src/analyzers/rust_base.py +155 -0
  5. src/analyzers/rust_context.py +141 -0
  6. src/analyzers/typescript_base.py +4 -0
  7. src/cli/__init__.py +30 -0
  8. src/cli/__main__.py +22 -0
  9. src/cli/config.py +480 -0
  10. src/cli/config_merge.py +241 -0
  11. src/cli/linters/__init__.py +67 -0
  12. src/cli/linters/code_patterns.py +270 -0
  13. src/cli/linters/code_smells.py +342 -0
  14. src/cli/linters/documentation.py +83 -0
  15. src/cli/linters/performance.py +287 -0
  16. src/cli/linters/shared.py +331 -0
  17. src/cli/linters/structure.py +327 -0
  18. src/cli/linters/structure_quality.py +328 -0
  19. src/cli/main.py +120 -0
  20. src/cli/utils.py +395 -0
  21. src/cli_main.py +37 -0
  22. src/config.py +38 -25
  23. src/core/base.py +7 -2
  24. src/core/cli_utils.py +19 -2
  25. src/core/config_parser.py +5 -2
  26. src/core/constants.py +54 -0
  27. src/core/linter_utils.py +95 -6
  28. src/core/python_lint_rule.py +101 -0
  29. src/core/registry.py +1 -1
  30. src/core/rule_discovery.py +147 -84
  31. src/core/types.py +13 -0
  32. src/core/violation_builder.py +78 -15
  33. src/core/violation_utils.py +69 -0
  34. src/formatters/__init__.py +22 -0
  35. src/formatters/sarif.py +202 -0
  36. src/linter_config/directive_markers.py +109 -0
  37. src/linter_config/ignore.py +254 -395
  38. src/linter_config/loader.py +45 -12
  39. src/linter_config/pattern_utils.py +65 -0
  40. src/linter_config/rule_matcher.py +89 -0
  41. src/linters/collection_pipeline/__init__.py +90 -0
  42. src/linters/collection_pipeline/any_all_analyzer.py +281 -0
  43. src/linters/collection_pipeline/ast_utils.py +40 -0
  44. src/linters/collection_pipeline/config.py +75 -0
  45. src/linters/collection_pipeline/continue_analyzer.py +94 -0
  46. src/linters/collection_pipeline/detector.py +360 -0
  47. src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
  48. src/linters/collection_pipeline/linter.py +420 -0
  49. src/linters/collection_pipeline/suggestion_builder.py +130 -0
  50. src/linters/cqs/__init__.py +54 -0
  51. src/linters/cqs/config.py +55 -0
  52. src/linters/cqs/function_analyzer.py +201 -0
  53. src/linters/cqs/input_detector.py +139 -0
  54. src/linters/cqs/linter.py +159 -0
  55. src/linters/cqs/output_detector.py +84 -0
  56. src/linters/cqs/python_analyzer.py +54 -0
  57. src/linters/cqs/types.py +82 -0
  58. src/linters/cqs/typescript_cqs_analyzer.py +61 -0
  59. src/linters/cqs/typescript_function_analyzer.py +192 -0
  60. src/linters/cqs/typescript_input_detector.py +203 -0
  61. src/linters/cqs/typescript_output_detector.py +117 -0
  62. src/linters/cqs/violation_builder.py +94 -0
  63. src/linters/dry/base_token_analyzer.py +16 -9
  64. src/linters/dry/block_filter.py +120 -20
  65. src/linters/dry/block_grouper.py +4 -0
  66. src/linters/dry/cache.py +104 -10
  67. src/linters/dry/cache_query.py +4 -0
  68. src/linters/dry/config.py +54 -11
  69. src/linters/dry/constant.py +92 -0
  70. src/linters/dry/constant_matcher.py +223 -0
  71. src/linters/dry/constant_violation_builder.py +98 -0
  72. src/linters/dry/duplicate_storage.py +5 -4
  73. src/linters/dry/file_analyzer.py +4 -2
  74. src/linters/dry/inline_ignore.py +7 -16
  75. src/linters/dry/linter.py +183 -48
  76. src/linters/dry/python_analyzer.py +60 -439
  77. src/linters/dry/python_constant_extractor.py +100 -0
  78. src/linters/dry/single_statement_detector.py +417 -0
  79. src/linters/dry/token_hasher.py +116 -112
  80. src/linters/dry/typescript_analyzer.py +68 -382
  81. src/linters/dry/typescript_constant_extractor.py +138 -0
  82. src/linters/dry/typescript_statement_detector.py +255 -0
  83. src/linters/dry/typescript_value_extractor.py +70 -0
  84. src/linters/dry/violation_builder.py +4 -0
  85. src/linters/dry/violation_filter.py +5 -4
  86. src/linters/dry/violation_generator.py +71 -14
  87. src/linters/file_header/atemporal_detector.py +68 -50
  88. src/linters/file_header/base_parser.py +93 -0
  89. src/linters/file_header/bash_parser.py +66 -0
  90. src/linters/file_header/config.py +90 -16
  91. src/linters/file_header/css_parser.py +70 -0
  92. src/linters/file_header/field_validator.py +36 -33
  93. src/linters/file_header/linter.py +140 -144
  94. src/linters/file_header/markdown_parser.py +130 -0
  95. src/linters/file_header/python_parser.py +14 -58
  96. src/linters/file_header/typescript_parser.py +73 -0
  97. src/linters/file_header/violation_builder.py +13 -12
  98. src/linters/file_placement/config_loader.py +3 -1
  99. src/linters/file_placement/directory_matcher.py +4 -0
  100. src/linters/file_placement/linter.py +66 -34
  101. src/linters/file_placement/pattern_matcher.py +41 -6
  102. src/linters/file_placement/pattern_validator.py +31 -12
  103. src/linters/file_placement/rule_checker.py +12 -7
  104. src/linters/lazy_ignores/__init__.py +43 -0
  105. src/linters/lazy_ignores/config.py +74 -0
  106. src/linters/lazy_ignores/directive_utils.py +164 -0
  107. src/linters/lazy_ignores/header_parser.py +177 -0
  108. src/linters/lazy_ignores/linter.py +158 -0
  109. src/linters/lazy_ignores/matcher.py +168 -0
  110. src/linters/lazy_ignores/python_analyzer.py +209 -0
  111. src/linters/lazy_ignores/rule_id_utils.py +180 -0
  112. src/linters/lazy_ignores/skip_detector.py +298 -0
  113. src/linters/lazy_ignores/types.py +71 -0
  114. src/linters/lazy_ignores/typescript_analyzer.py +146 -0
  115. src/linters/lazy_ignores/violation_builder.py +135 -0
  116. src/linters/lbyl/__init__.py +31 -0
  117. src/linters/lbyl/config.py +63 -0
  118. src/linters/lbyl/linter.py +67 -0
  119. src/linters/lbyl/pattern_detectors/__init__.py +53 -0
  120. src/linters/lbyl/pattern_detectors/base.py +63 -0
  121. src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
  122. src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
  123. src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
  124. src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
  125. src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
  126. src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
  127. src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
  128. src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
  129. src/linters/lbyl/python_analyzer.py +215 -0
  130. src/linters/lbyl/violation_builder.py +354 -0
  131. src/linters/magic_numbers/context_analyzer.py +227 -225
  132. src/linters/magic_numbers/linter.py +28 -82
  133. src/linters/magic_numbers/python_analyzer.py +4 -16
  134. src/linters/magic_numbers/typescript_analyzer.py +9 -12
  135. src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
  136. src/linters/method_property/__init__.py +49 -0
  137. src/linters/method_property/config.py +138 -0
  138. src/linters/method_property/linter.py +414 -0
  139. src/linters/method_property/python_analyzer.py +473 -0
  140. src/linters/method_property/violation_builder.py +119 -0
  141. src/linters/nesting/linter.py +24 -16
  142. src/linters/nesting/python_analyzer.py +4 -0
  143. src/linters/nesting/typescript_analyzer.py +6 -12
  144. src/linters/nesting/violation_builder.py +1 -0
  145. src/linters/performance/__init__.py +91 -0
  146. src/linters/performance/config.py +43 -0
  147. src/linters/performance/constants.py +49 -0
  148. src/linters/performance/linter.py +149 -0
  149. src/linters/performance/python_analyzer.py +365 -0
  150. src/linters/performance/regex_analyzer.py +312 -0
  151. src/linters/performance/regex_linter.py +139 -0
  152. src/linters/performance/typescript_analyzer.py +236 -0
  153. src/linters/performance/violation_builder.py +160 -0
  154. src/linters/print_statements/config.py +7 -12
  155. src/linters/print_statements/linter.py +26 -43
  156. src/linters/print_statements/python_analyzer.py +91 -93
  157. src/linters/print_statements/typescript_analyzer.py +15 -25
  158. src/linters/print_statements/violation_builder.py +12 -14
  159. src/linters/srp/class_analyzer.py +11 -7
  160. src/linters/srp/heuristics.py +56 -22
  161. src/linters/srp/linter.py +15 -16
  162. src/linters/srp/python_analyzer.py +55 -20
  163. src/linters/srp/typescript_metrics_calculator.py +110 -50
  164. src/linters/stateless_class/__init__.py +25 -0
  165. src/linters/stateless_class/config.py +58 -0
  166. src/linters/stateless_class/linter.py +349 -0
  167. src/linters/stateless_class/python_analyzer.py +290 -0
  168. src/linters/stringly_typed/__init__.py +36 -0
  169. src/linters/stringly_typed/config.py +189 -0
  170. src/linters/stringly_typed/context_filter.py +451 -0
  171. src/linters/stringly_typed/function_call_violation_builder.py +135 -0
  172. src/linters/stringly_typed/ignore_checker.py +100 -0
  173. src/linters/stringly_typed/ignore_utils.py +51 -0
  174. src/linters/stringly_typed/linter.py +376 -0
  175. src/linters/stringly_typed/python/__init__.py +33 -0
  176. src/linters/stringly_typed/python/analyzer.py +348 -0
  177. src/linters/stringly_typed/python/call_tracker.py +175 -0
  178. src/linters/stringly_typed/python/comparison_tracker.py +257 -0
  179. src/linters/stringly_typed/python/condition_extractor.py +134 -0
  180. src/linters/stringly_typed/python/conditional_detector.py +179 -0
  181. src/linters/stringly_typed/python/constants.py +21 -0
  182. src/linters/stringly_typed/python/match_analyzer.py +94 -0
  183. src/linters/stringly_typed/python/validation_detector.py +189 -0
  184. src/linters/stringly_typed/python/variable_extractor.py +96 -0
  185. src/linters/stringly_typed/storage.py +620 -0
  186. src/linters/stringly_typed/storage_initializer.py +45 -0
  187. src/linters/stringly_typed/typescript/__init__.py +28 -0
  188. src/linters/stringly_typed/typescript/analyzer.py +157 -0
  189. src/linters/stringly_typed/typescript/call_tracker.py +335 -0
  190. src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
  191. src/linters/stringly_typed/violation_generator.py +419 -0
  192. src/orchestrator/core.py +252 -14
  193. src/orchestrator/language_detector.py +5 -3
  194. src/templates/thailint_config_template.yaml +196 -0
  195. src/utils/project_root.py +3 -0
  196. thailint-0.15.3.dist-info/METADATA +187 -0
  197. thailint-0.15.3.dist-info/RECORD +226 -0
  198. thailint-0.15.3.dist-info/entry_points.txt +4 -0
  199. src/cli.py +0 -1665
  200. thailint-0.5.0.dist-info/METADATA +0 -1286
  201. thailint-0.5.0.dist-info/RECORD +0 -96
  202. thailint-0.5.0.dist-info/entry_points.txt +0 -4
  203. {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +0 -0
  204. {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/licenses/LICENSE +0 -0
@@ -19,6 +19,11 @@ Interfaces: TypeScriptDuplicateAnalyzer.analyze(file_path: Path, content: str, c
19
19
  Implementation: Inherits analyze() workflow from BaseTokenAnalyzer, adds JSDoc comment extraction,
20
20
  single statement detection using tree-sitter AST patterns, and interface filtering logic
21
21
 
22
+ Suppressions:
23
+ - type:ignore[assignment,misc]: Tree-sitter Node type alias (optional dependency fallback)
24
+ - invalid-name: Node type alias follows tree-sitter naming convention
25
+ - srp.violation: Complex tree-sitter AST analysis algorithm. See SRP Exception below.
26
+
22
27
  SRP Exception: TypeScriptDuplicateAnalyzer has 20 methods and 324 lines (exceeds max 8 methods/200 lines)
23
28
  Justification: Complex tree-sitter AST analysis algorithm for duplicate code detection with sophisticated
24
29
  false positive filtering. Mirrors Python analyzer structure. Methods form tightly coupled algorithm
@@ -30,15 +35,17 @@ SRP Exception: TypeScriptDuplicateAnalyzer has 20 methods and 324 lines (exceeds
30
35
  responsibility: accurately detecting duplicate TypeScript/JavaScript code while minimizing false positives.
31
36
  """
32
37
 
33
- from collections.abc import Generator
38
+ from collections.abc import Iterable
34
39
  from pathlib import Path
35
40
 
36
41
  from src.analyzers.typescript_base import TREE_SITTER_AVAILABLE
37
42
 
43
+ from . import token_hasher
38
44
  from .base_token_analyzer import BaseTokenAnalyzer
39
45
  from .block_filter import BlockFilterRegistry, create_default_registry
40
46
  from .cache import CodeBlock
41
47
  from .config import DRYConfig
48
+ from .typescript_statement_detector import is_single_statement, should_include_block
42
49
 
43
50
  if TREE_SITTER_AVAILABLE:
44
51
  from tree_sitter import Node
@@ -84,16 +91,33 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
84
91
  # Generate rolling hash windows
85
92
  windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
86
93
 
87
- blocks = []
88
- for hash_val, start_line, end_line, snippet in windows:
89
- # Filter interface/type definitions
90
- if not self._should_include_block(content, start_line, end_line):
91
- continue
94
+ # Filter out interface/type definitions and single statement patterns
95
+ valid_windows = (
96
+ (hash_val, start_line, end_line, snippet)
97
+ for hash_val, start_line, end_line, snippet in windows
98
+ if should_include_block(content, start_line, end_line)
99
+ and not is_single_statement(content, start_line, end_line)
100
+ )
101
+ return self._build_blocks(valid_windows, file_path, content)
102
+
103
+ def _build_blocks(
104
+ self,
105
+ windows: Iterable[tuple[int, int, int, str]],
106
+ file_path: Path,
107
+ content: str,
108
+ ) -> list[CodeBlock]:
109
+ """Build CodeBlock objects from valid windows, applying filters.
92
110
 
93
- # Filter single statement patterns
94
- if self._is_single_statement_in_source(content, start_line, end_line):
95
- continue
111
+ Args:
112
+ windows: Iterable of (hash_val, start_line, end_line, snippet) tuples
113
+ file_path: Path to source file
114
+ content: File content
96
115
 
116
+ Returns:
117
+ List of CodeBlock instances that pass all filters
118
+ """
119
+ blocks = []
120
+ for hash_val, start_line, end_line, snippet in windows:
97
121
  block = CodeBlock(
98
122
  file_path=file_path,
99
123
  start_line=start_line,
@@ -101,13 +125,8 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
101
125
  snippet=snippet,
102
126
  hash_value=hash_val,
103
127
  )
104
-
105
- # Apply extensible filters (keyword arguments, imports, etc.)
106
- if self._filter_registry.should_filter_block(block, content):
107
- continue
108
-
109
- blocks.append(block)
110
-
128
+ if not self._filter_registry.should_filter_block(block, content):
129
+ blocks.append(block)
111
130
  return blocks
112
131
 
113
132
  def _get_jsdoc_ranges_from_content(self, content: str) -> set[int]:
@@ -188,26 +207,44 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
188
207
  lines_with_numbers = []
189
208
  in_multiline_import = False
190
209
 
191
- for line_num, line in enumerate(content.split("\n"), start=1):
192
- # Skip JSDoc comment lines
193
- if line_num in jsdoc_lines:
194
- continue
195
-
196
- line = self._hasher._normalize_line(line) # pylint: disable=protected-access
197
- if not line:
198
- continue
199
-
200
- # Update multi-line import state and check if line should be skipped
201
- in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
210
+ # Skip JSDoc comment lines
211
+ non_jsdoc_lines = (
212
+ (line_num, line)
213
+ for line_num, line in enumerate(content.split("\n"), start=1)
214
+ if line_num not in jsdoc_lines
215
+ )
216
+ for line_num, line in non_jsdoc_lines:
217
+ in_multiline_import, normalized = self._normalize_and_filter_line(
202
218
  line, in_multiline_import
203
219
  )
204
- if should_skip:
205
- continue
206
-
207
- lines_with_numbers.append((line_num, line))
220
+ if normalized is not None:
221
+ lines_with_numbers.append((line_num, normalized))
208
222
 
209
223
  return lines_with_numbers
210
224
 
225
+ def _normalize_and_filter_line(
226
+ self, line: str, in_multiline_import: bool
227
+ ) -> tuple[bool, str | None]:
228
+ """Normalize line and check if it should be included.
229
+
230
+ Args:
231
+ line: Raw source line
232
+ in_multiline_import: Current multi-line import state
233
+
234
+ Returns:
235
+ Tuple of (new_import_state, normalized_line or None if should skip)
236
+ """
237
+ normalized = token_hasher.normalize_line(line)
238
+ if not normalized:
239
+ return in_multiline_import, None
240
+
241
+ new_state, should_skip = token_hasher.should_skip_import_line(
242
+ normalized, in_multiline_import
243
+ )
244
+ if should_skip:
245
+ return new_state, None
246
+ return new_state, normalized
247
+
211
248
  def _rolling_hash_with_tracking(
212
249
  self, lines_with_numbers: list[tuple[int, str]], window_size: int
213
250
  ) -> list[tuple[int, int, int, str]]:
@@ -239,354 +276,3 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
239
276
  hashes.append((hash_val, start_line, end_line, snippet))
240
277
 
241
278
  return hashes
242
-
243
- def _should_include_block(self, content: str, start_line: int, end_line: int) -> bool:
244
- """Filter out blocks that overlap with interface/type definitions.
245
-
246
- Args:
247
- content: File content
248
- start_line: Block start line
249
- end_line: Block end line
250
-
251
- Returns:
252
- False if block overlaps interface definition, True otherwise
253
- """
254
- interface_ranges = self._find_interface_ranges(content)
255
- return not self._overlaps_interface(start_line, end_line, interface_ranges)
256
-
257
- def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
258
- """Check if a line range in the original source is a single logical statement.
259
-
260
- Uses tree-sitter AST analysis to detect patterns like:
261
- - Decorators (@Component(...))
262
- - Function call arguments
263
- - Object literal properties
264
- - Class field definitions
265
- - Type assertions
266
- - Chained method calls (single expression)
267
-
268
- Args:
269
- content: TypeScript source code
270
- start_line: Starting line number (1-indexed)
271
- end_line: Ending line number (1-indexed)
272
-
273
- Returns:
274
- True if this range represents a single logical statement/expression
275
- """
276
- if not TREE_SITTER_AVAILABLE:
277
- return False
278
-
279
- from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
280
-
281
- analyzer = TypeScriptBaseAnalyzer()
282
- root = analyzer.parse_typescript(content)
283
- if not root:
284
- return False
285
-
286
- return self._check_overlapping_nodes(root, start_line, end_line)
287
-
288
- def _check_overlapping_nodes(self, root: Node, start_line: int, end_line: int) -> bool:
289
- """Check if any AST node overlaps and matches single-statement pattern.
290
-
291
- Args:
292
- root: Root tree-sitter node
293
- start_line: Starting line number (1-indexed)
294
- end_line: Ending line number (1-indexed)
295
-
296
- Returns:
297
- True if any node matches single-statement pattern
298
- """
299
- # Convert to 0-indexed for tree-sitter
300
- ts_start = start_line - 1
301
- ts_end = end_line - 1
302
-
303
- for node in self._walk_nodes(root):
304
- if self._node_overlaps_and_matches(node, ts_start, ts_end):
305
- return True
306
- return False
307
-
308
- def _walk_nodes(self, node: Node) -> Generator[Node, None, None]:
309
- """Generator to walk all nodes in tree.
310
-
311
- Args:
312
- node: Starting node
313
-
314
- Yields:
315
- All nodes in tree
316
- """
317
- yield node
318
- for child in node.children:
319
- yield from self._walk_nodes(child)
320
-
321
- def _node_overlaps_and_matches(self, node: Node, ts_start: int, ts_end: int) -> bool:
322
- """Check if node overlaps with range and matches single-statement pattern.
323
-
324
- Args:
325
- node: Tree-sitter node
326
- ts_start: Starting line (0-indexed)
327
- ts_end: Ending line (0-indexed)
328
-
329
- Returns:
330
- True if node overlaps and matches pattern
331
- """
332
- node_start = node.start_point[0]
333
- node_end = node.end_point[0]
334
-
335
- # Check if ranges overlap
336
- overlaps = not (node_end < ts_start or node_start > ts_end)
337
- if not overlaps:
338
- return False
339
-
340
- return self._is_single_statement_pattern(node, ts_start, ts_end)
341
-
342
- def _matches_simple_container_pattern(self, node: Node, contains: bool) -> bool:
343
- """Check if node is a simple container pattern (decorator, object, etc.).
344
-
345
- Args:
346
- node: AST node to check
347
- contains: Whether node contains the range
348
-
349
- Returns:
350
- True if node matches simple container pattern
351
- """
352
- simple_types = (
353
- "decorator",
354
- "object",
355
- "member_expression",
356
- "as_expression",
357
- "array_pattern",
358
- )
359
- return node.type in simple_types and contains
360
-
361
- def _matches_call_expression_pattern(
362
- self, node: Node, ts_start: int, ts_end: int, contains: bool
363
- ) -> bool:
364
- """Check if node is a call expression pattern.
365
-
366
- Args:
367
- node: AST node to check
368
- ts_start: Starting line (0-indexed)
369
- ts_end: Ending line (0-indexed)
370
- contains: Whether node contains the range
371
-
372
- Returns:
373
- True if node matches call expression pattern
374
- """
375
- if node.type != "call_expression":
376
- return False
377
-
378
- # Check if this is a multi-line call containing the range
379
- node_start = node.start_point[0]
380
- node_end = node.end_point[0]
381
- is_multiline = node_start < node_end
382
- if is_multiline and node_start <= ts_start <= node_end:
383
- return True
384
-
385
- return contains
386
-
387
- def _matches_declaration_pattern(self, node: Node, contains: bool) -> bool:
388
- """Check if node is a lexical declaration pattern.
389
-
390
- Args:
391
- node: AST node to check
392
- contains: Whether node contains the range
393
-
394
- Returns:
395
- True if node matches declaration pattern (excluding function bodies)
396
- """
397
- if node.type != "lexical_declaration" or not contains:
398
- return False
399
-
400
- # Only filter if simple value assignment, NOT a function body
401
- if self._contains_function_body(node):
402
- return False
403
-
404
- return True
405
-
406
- def _matches_jsx_pattern(self, node: Node, contains: bool) -> bool:
407
- """Check if node is a JSX element pattern.
408
-
409
- Args:
410
- node: AST node to check
411
- contains: Whether node contains the range
412
-
413
- Returns:
414
- True if node matches JSX pattern
415
- """
416
- jsx_types = ("jsx_opening_element", "jsx_self_closing_element")
417
- return node.type in jsx_types and contains
418
-
419
- def _matches_class_body_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
420
- """Check if node is a class body field definition pattern.
421
-
422
- Args:
423
- node: AST node to check
424
- ts_start: Starting line (0-indexed)
425
- ts_end: Ending line (0-indexed)
426
-
427
- Returns:
428
- True if node is class body with field definitions
429
- """
430
- if node.type != "class_body":
431
- return False
432
-
433
- return self._is_in_class_field_area(node, ts_start, ts_end)
434
-
435
- def _is_single_statement_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
436
- """Check if an AST node represents a single-statement pattern to filter.
437
-
438
- Delegates to specialized pattern matchers for different AST node categories.
439
-
440
- Args:
441
- node: AST node that overlaps with the line range
442
- ts_start: Starting line number (0-indexed)
443
- ts_end: Ending line number (0-indexed)
444
-
445
- Returns:
446
- True if this node represents a single logical statement pattern
447
- """
448
- node_start = node.start_point[0]
449
- node_end = node.end_point[0]
450
- contains = (node_start <= ts_start) and (node_end >= ts_end)
451
-
452
- # Check pattern categories using specialized helpers - use list for any()
453
- matchers = [
454
- self._matches_simple_container_pattern(node, contains),
455
- self._matches_call_expression_pattern(node, ts_start, ts_end, contains),
456
- self._matches_declaration_pattern(node, contains),
457
- self._matches_jsx_pattern(node, contains),
458
- self._matches_class_body_pattern(node, ts_start, ts_end),
459
- ]
460
- return any(matchers)
461
-
462
- def _contains_function_body(self, node: Node) -> bool:
463
- """Check if node contains an arrow function or function expression.
464
-
465
- Args:
466
- node: Node to check
467
-
468
- Returns:
469
- True if node contains a function with a body
470
- """
471
- for child in node.children:
472
- if child.type in ("arrow_function", "function", "function_expression"):
473
- return True
474
- if self._contains_function_body(child):
475
- return True
476
- return False
477
-
478
- def _find_first_method_line(self, class_body: Node) -> int | None:
479
- """Find line number of first method in class body.
480
-
481
- Args:
482
- class_body: Class body node
483
-
484
- Returns:
485
- Line number of first method or None if no methods
486
- """
487
- for child in class_body.children:
488
- if child.type in ("method_definition", "function_declaration"):
489
- return child.start_point[0]
490
- return None
491
-
492
- def _is_in_class_field_area(self, class_body: Node, ts_start: int, ts_end: int) -> bool:
493
- """Check if range is in class field definition area (before methods).
494
-
495
- Args:
496
- class_body: Class body node
497
- ts_start: Starting line (0-indexed)
498
- ts_end: Ending line (0-indexed)
499
-
500
- Returns:
501
- True if range is in field area
502
- """
503
- first_method_line = self._find_first_method_line(class_body)
504
- class_start = class_body.start_point[0]
505
- class_end = class_body.end_point[0]
506
-
507
- # No methods: check if range is in class body
508
- if first_method_line is None:
509
- return class_start <= ts_start and class_end >= ts_end
510
-
511
- # Has methods: check if range is before first method
512
- return class_start <= ts_start and ts_end < first_method_line
513
-
514
- def _find_interface_ranges(self, content: str) -> list[tuple[int, int]]:
515
- """Find line ranges of interface/type definitions.
516
-
517
- Args:
518
- content: File content
519
-
520
- Returns:
521
- List of (start_line, end_line) tuples for interface blocks
522
- """
523
- ranges: list[tuple[int, int]] = []
524
- lines = content.split("\n")
525
- state = {"in_interface": False, "start_line": 0, "brace_count": 0}
526
-
527
- for i, line in enumerate(lines, start=1):
528
- stripped = line.strip()
529
- self._process_line_for_interface(stripped, i, state, ranges)
530
-
531
- return ranges
532
-
533
- def _process_line_for_interface(
534
- self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
535
- ) -> None:
536
- """Process single line for interface detection.
537
-
538
- Args:
539
- stripped: Stripped line content
540
- line_num: Line number
541
- state: Tracking state (in_interface, start_line, brace_count)
542
- ranges: Accumulated interface ranges
543
- """
544
- if self._is_interface_start(stripped):
545
- self._handle_interface_start(stripped, line_num, state, ranges)
546
- return
547
-
548
- if state["in_interface"]:
549
- self._handle_interface_continuation(stripped, line_num, state, ranges)
550
-
551
- def _is_interface_start(self, stripped: str) -> bool:
552
- """Check if line starts interface/type definition."""
553
- return stripped.startswith(("interface ", "type ")) and "{" in stripped
554
-
555
- def _handle_interface_start(
556
- self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
557
- ) -> None:
558
- """Handle start of interface definition."""
559
- state["in_interface"] = True
560
- state["start_line"] = line_num
561
- state["brace_count"] = stripped.count("{") - stripped.count("}")
562
-
563
- if state["brace_count"] == 0: # Single-line interface
564
- ranges.append((line_num, line_num))
565
- state["in_interface"] = False
566
-
567
- def _handle_interface_continuation(
568
- self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
569
- ) -> None:
570
- """Handle continuation of interface definition."""
571
- state["brace_count"] += stripped.count("{") - stripped.count("}")
572
- if state["brace_count"] == 0:
573
- ranges.append((state["start_line"], line_num))
574
- state["in_interface"] = False
575
-
576
- def _overlaps_interface(
577
- self, start: int, end: int, interface_ranges: list[tuple[int, int]]
578
- ) -> bool:
579
- """Check if block overlaps with any interface range.
580
-
581
- Args:
582
- start: Block start line
583
- end: Block end line
584
- interface_ranges: List of interface definition ranges
585
-
586
- Returns:
587
- True if block overlaps with an interface
588
- """
589
- for if_start, if_end in interface_ranges:
590
- if start <= if_end and end >= if_start:
591
- return True
592
- return False
@@ -0,0 +1,138 @@
1
+ """
2
+ Purpose: Extract TypeScript module-level constants using tree-sitter parsing
3
+
4
+ Scope: TypeScript constant extraction for duplicate constants detection
5
+
6
+ Overview: Extracts module-level constant definitions from TypeScript source code using tree-sitter.
7
+ Identifies constants as top-level `const` declarations where the variable name matches the
8
+ UPPER_SNAKE_CASE naming convention (e.g., const API_TIMEOUT = 30). Excludes non-const
9
+ declarations (let, var), class-level constants, and function-level constants to focus on
10
+ public module constants that should be consolidated across files.
11
+
12
+ Dependencies: tree-sitter, tree-sitter-typescript, re for pattern matching, ConstantInfo,
13
+ TypeScriptValueExtractor
14
+
15
+ Exports: TypeScriptConstantExtractor class
16
+
17
+ Interfaces: TypeScriptConstantExtractor.extract(content: str) -> list[ConstantInfo]
18
+
19
+ Implementation: Tree-sitter-based parsing with const declaration filtering and ALL_CAPS regex matching
20
+
21
+ Suppressions:
22
+ - type:ignore[assignment,misc]: Tree-sitter Node type alias (optional dependency fallback)
23
+ - broad-exception-caught: Defensive parsing for malformed TypeScript code
24
+ """
25
+
26
+ from typing import Any
27
+
28
+ from src.analyzers.typescript_base import TREE_SITTER_AVAILABLE, TS_PARSER
29
+
30
+ from .constant import CONSTANT_NAME_PATTERN, ConstantInfo
31
+ from .typescript_value_extractor import TypeScriptValueExtractor
32
+
33
+ if TREE_SITTER_AVAILABLE:
34
+ from tree_sitter import Node
35
+ else:
36
+ Node = Any # type: ignore[assignment,misc]
37
+
38
+ # Node types that represent values
39
+ VALUE_TYPES = frozenset(
40
+ (
41
+ "number",
42
+ "string",
43
+ "true",
44
+ "false",
45
+ "null",
46
+ "identifier",
47
+ "array",
48
+ "object",
49
+ "call_expression",
50
+ )
51
+ )
52
+
53
+
54
+ class TypeScriptConstantExtractor:
55
+ """Extracts module-level constants from TypeScript source code."""
56
+
57
+ def __init__(self) -> None:
58
+ """Initialize the TypeScript constant extractor."""
59
+ self.tree_sitter_available = TREE_SITTER_AVAILABLE
60
+ self._value_extractor = TypeScriptValueExtractor()
61
+
62
+ def extract(self, content: str) -> list[ConstantInfo]:
63
+ """Extract constants from TypeScript source code."""
64
+ root = _parse_content(content)
65
+ if root is None:
66
+ return []
67
+ constants: list[ConstantInfo] = []
68
+ for child in root.children:
69
+ constants.extend(self._extract_from_node(child, content))
70
+ return constants
71
+
72
+ def _extract_from_node(self, node: Node, content: str) -> list[ConstantInfo]:
73
+ """Extract constants from a single AST node."""
74
+ if node.type == "lexical_declaration":
75
+ return self._extract_from_lexical_declaration(node, content)
76
+ if node.type == "export_statement":
77
+ return self._extract_from_export(node, content)
78
+ return []
79
+
80
+ def _extract_from_lexical_declaration(self, node: Node, content: str) -> list[ConstantInfo]:
81
+ """Extract constants from a lexical declaration."""
82
+ if not _is_const_declaration(node):
83
+ return []
84
+ return [
85
+ info
86
+ for c in node.children
87
+ if c.type == "variable_declarator"
88
+ and (info := self._extract_from_declarator(c, content))
89
+ ]
90
+
91
+ def _extract_from_export(self, node: Node, content: str) -> list[ConstantInfo]:
92
+ """Extract constants from an export statement."""
93
+ for child in node.children:
94
+ if child.type == "lexical_declaration":
95
+ return self._extract_from_lexical_declaration(child, content)
96
+ return []
97
+
98
+ def _extract_from_declarator(self, node: Node, content: str) -> ConstantInfo | None:
99
+ """Extract constant info from a variable declarator."""
100
+ name, value = _get_name_and_value(node, content, self._value_extractor)
101
+ if not name or not _is_constant_name(name):
102
+ return None
103
+ return ConstantInfo(name=name, line_number=node.start_point[0] + 1, value=value)
104
+
105
+
106
+ def _parse_content(content: str) -> Node | None:
107
+ """Parse content and return root node, or None on failure."""
108
+ if not TREE_SITTER_AVAILABLE or TS_PARSER is None:
109
+ return None
110
+ try:
111
+ return TS_PARSER.parse(bytes(content, "utf8")).root_node
112
+ except Exception: # pylint: disable=broad-exception-caught
113
+ return None
114
+
115
+
116
+ def _is_const_declaration(node: Node) -> bool:
117
+ """Check if lexical declaration is a const."""
118
+ return any(child.type == "const" for child in node.children)
119
+
120
+
121
+ def _get_name_and_value(
122
+ node: Node, content: str, extractor: TypeScriptValueExtractor
123
+ ) -> tuple[str | None, str | None]:
124
+ """Extract name and value from declarator node."""
125
+ name = next(
126
+ (extractor.get_node_text(c, content) for c in node.children if c.type == "identifier"),
127
+ None,
128
+ )
129
+ value = next(
130
+ (extractor.get_value_string(c, content) for c in node.children if c.type in VALUE_TYPES),
131
+ None,
132
+ )
133
+ return name, value
134
+
135
+
136
+ def _is_constant_name(name: str) -> bool:
137
+ """Check if name matches constant naming convention."""
138
+ return not name.startswith("_") and bool(CONSTANT_NAME_PATTERN.match(name))