thailint 0.2.0__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. src/__init__.py +1 -0
  2. src/analyzers/__init__.py +4 -3
  3. src/analyzers/ast_utils.py +54 -0
  4. src/analyzers/rust_base.py +155 -0
  5. src/analyzers/rust_context.py +141 -0
  6. src/analyzers/typescript_base.py +4 -0
  7. src/cli/__init__.py +30 -0
  8. src/cli/__main__.py +22 -0
  9. src/cli/config.py +480 -0
  10. src/cli/config_merge.py +241 -0
  11. src/cli/linters/__init__.py +67 -0
  12. src/cli/linters/code_patterns.py +270 -0
  13. src/cli/linters/code_smells.py +342 -0
  14. src/cli/linters/documentation.py +83 -0
  15. src/cli/linters/performance.py +287 -0
  16. src/cli/linters/shared.py +331 -0
  17. src/cli/linters/structure.py +327 -0
  18. src/cli/linters/structure_quality.py +328 -0
  19. src/cli/main.py +120 -0
  20. src/cli/utils.py +395 -0
  21. src/cli_main.py +37 -0
  22. src/config.py +44 -27
  23. src/core/base.py +95 -5
  24. src/core/cli_utils.py +19 -2
  25. src/core/config_parser.py +36 -6
  26. src/core/constants.py +54 -0
  27. src/core/linter_utils.py +95 -6
  28. src/core/python_lint_rule.py +101 -0
  29. src/core/registry.py +1 -1
  30. src/core/rule_discovery.py +147 -84
  31. src/core/types.py +13 -0
  32. src/core/violation_builder.py +78 -15
  33. src/core/violation_utils.py +69 -0
  34. src/formatters/__init__.py +22 -0
  35. src/formatters/sarif.py +202 -0
  36. src/linter_config/directive_markers.py +109 -0
  37. src/linter_config/ignore.py +254 -395
  38. src/linter_config/loader.py +45 -12
  39. src/linter_config/pattern_utils.py +65 -0
  40. src/linter_config/rule_matcher.py +89 -0
  41. src/linters/collection_pipeline/__init__.py +90 -0
  42. src/linters/collection_pipeline/any_all_analyzer.py +281 -0
  43. src/linters/collection_pipeline/ast_utils.py +40 -0
  44. src/linters/collection_pipeline/config.py +75 -0
  45. src/linters/collection_pipeline/continue_analyzer.py +94 -0
  46. src/linters/collection_pipeline/detector.py +360 -0
  47. src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
  48. src/linters/collection_pipeline/linter.py +420 -0
  49. src/linters/collection_pipeline/suggestion_builder.py +130 -0
  50. src/linters/cqs/__init__.py +54 -0
  51. src/linters/cqs/config.py +55 -0
  52. src/linters/cqs/function_analyzer.py +201 -0
  53. src/linters/cqs/input_detector.py +139 -0
  54. src/linters/cqs/linter.py +159 -0
  55. src/linters/cqs/output_detector.py +84 -0
  56. src/linters/cqs/python_analyzer.py +54 -0
  57. src/linters/cqs/types.py +82 -0
  58. src/linters/cqs/typescript_cqs_analyzer.py +61 -0
  59. src/linters/cqs/typescript_function_analyzer.py +192 -0
  60. src/linters/cqs/typescript_input_detector.py +203 -0
  61. src/linters/cqs/typescript_output_detector.py +117 -0
  62. src/linters/cqs/violation_builder.py +94 -0
  63. src/linters/dry/base_token_analyzer.py +16 -9
  64. src/linters/dry/block_filter.py +125 -22
  65. src/linters/dry/block_grouper.py +4 -0
  66. src/linters/dry/cache.py +142 -94
  67. src/linters/dry/cache_query.py +4 -0
  68. src/linters/dry/config.py +68 -21
  69. src/linters/dry/constant.py +92 -0
  70. src/linters/dry/constant_matcher.py +223 -0
  71. src/linters/dry/constant_violation_builder.py +98 -0
  72. src/linters/dry/duplicate_storage.py +20 -82
  73. src/linters/dry/file_analyzer.py +15 -50
  74. src/linters/dry/inline_ignore.py +7 -16
  75. src/linters/dry/linter.py +182 -54
  76. src/linters/dry/python_analyzer.py +108 -336
  77. src/linters/dry/python_constant_extractor.py +100 -0
  78. src/linters/dry/single_statement_detector.py +417 -0
  79. src/linters/dry/storage_initializer.py +9 -18
  80. src/linters/dry/token_hasher.py +129 -71
  81. src/linters/dry/typescript_analyzer.py +68 -380
  82. src/linters/dry/typescript_constant_extractor.py +138 -0
  83. src/linters/dry/typescript_statement_detector.py +255 -0
  84. src/linters/dry/typescript_value_extractor.py +70 -0
  85. src/linters/dry/violation_builder.py +4 -0
  86. src/linters/dry/violation_filter.py +9 -5
  87. src/linters/dry/violation_generator.py +71 -14
  88. src/linters/file_header/__init__.py +24 -0
  89. src/linters/file_header/atemporal_detector.py +105 -0
  90. src/linters/file_header/base_parser.py +93 -0
  91. src/linters/file_header/bash_parser.py +66 -0
  92. src/linters/file_header/config.py +140 -0
  93. src/linters/file_header/css_parser.py +70 -0
  94. src/linters/file_header/field_validator.py +72 -0
  95. src/linters/file_header/linter.py +309 -0
  96. src/linters/file_header/markdown_parser.py +130 -0
  97. src/linters/file_header/python_parser.py +42 -0
  98. src/linters/file_header/typescript_parser.py +73 -0
  99. src/linters/file_header/violation_builder.py +79 -0
  100. src/linters/file_placement/config_loader.py +3 -1
  101. src/linters/file_placement/directory_matcher.py +4 -0
  102. src/linters/file_placement/linter.py +74 -31
  103. src/linters/file_placement/pattern_matcher.py +41 -6
  104. src/linters/file_placement/pattern_validator.py +31 -12
  105. src/linters/file_placement/rule_checker.py +12 -7
  106. src/linters/lazy_ignores/__init__.py +43 -0
  107. src/linters/lazy_ignores/config.py +74 -0
  108. src/linters/lazy_ignores/directive_utils.py +164 -0
  109. src/linters/lazy_ignores/header_parser.py +177 -0
  110. src/linters/lazy_ignores/linter.py +158 -0
  111. src/linters/lazy_ignores/matcher.py +168 -0
  112. src/linters/lazy_ignores/python_analyzer.py +209 -0
  113. src/linters/lazy_ignores/rule_id_utils.py +180 -0
  114. src/linters/lazy_ignores/skip_detector.py +298 -0
  115. src/linters/lazy_ignores/types.py +71 -0
  116. src/linters/lazy_ignores/typescript_analyzer.py +146 -0
  117. src/linters/lazy_ignores/violation_builder.py +135 -0
  118. src/linters/lbyl/__init__.py +31 -0
  119. src/linters/lbyl/config.py +63 -0
  120. src/linters/lbyl/linter.py +67 -0
  121. src/linters/lbyl/pattern_detectors/__init__.py +53 -0
  122. src/linters/lbyl/pattern_detectors/base.py +63 -0
  123. src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
  124. src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
  125. src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
  126. src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
  127. src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
  128. src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
  129. src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
  130. src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
  131. src/linters/lbyl/python_analyzer.py +215 -0
  132. src/linters/lbyl/violation_builder.py +354 -0
  133. src/linters/magic_numbers/__init__.py +48 -0
  134. src/linters/magic_numbers/config.py +82 -0
  135. src/linters/magic_numbers/context_analyzer.py +249 -0
  136. src/linters/magic_numbers/linter.py +462 -0
  137. src/linters/magic_numbers/python_analyzer.py +64 -0
  138. src/linters/magic_numbers/typescript_analyzer.py +215 -0
  139. src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
  140. src/linters/magic_numbers/violation_builder.py +98 -0
  141. src/linters/method_property/__init__.py +49 -0
  142. src/linters/method_property/config.py +138 -0
  143. src/linters/method_property/linter.py +414 -0
  144. src/linters/method_property/python_analyzer.py +473 -0
  145. src/linters/method_property/violation_builder.py +119 -0
  146. src/linters/nesting/__init__.py +6 -2
  147. src/linters/nesting/config.py +6 -3
  148. src/linters/nesting/linter.py +31 -34
  149. src/linters/nesting/python_analyzer.py +4 -0
  150. src/linters/nesting/typescript_analyzer.py +6 -11
  151. src/linters/nesting/violation_builder.py +1 -0
  152. src/linters/performance/__init__.py +91 -0
  153. src/linters/performance/config.py +43 -0
  154. src/linters/performance/constants.py +49 -0
  155. src/linters/performance/linter.py +149 -0
  156. src/linters/performance/python_analyzer.py +365 -0
  157. src/linters/performance/regex_analyzer.py +312 -0
  158. src/linters/performance/regex_linter.py +139 -0
  159. src/linters/performance/typescript_analyzer.py +236 -0
  160. src/linters/performance/violation_builder.py +160 -0
  161. src/linters/print_statements/__init__.py +53 -0
  162. src/linters/print_statements/config.py +78 -0
  163. src/linters/print_statements/linter.py +413 -0
  164. src/linters/print_statements/python_analyzer.py +153 -0
  165. src/linters/print_statements/typescript_analyzer.py +125 -0
  166. src/linters/print_statements/violation_builder.py +96 -0
  167. src/linters/srp/__init__.py +3 -3
  168. src/linters/srp/class_analyzer.py +11 -7
  169. src/linters/srp/config.py +12 -6
  170. src/linters/srp/heuristics.py +56 -22
  171. src/linters/srp/linter.py +47 -39
  172. src/linters/srp/python_analyzer.py +55 -20
  173. src/linters/srp/typescript_metrics_calculator.py +110 -50
  174. src/linters/stateless_class/__init__.py +25 -0
  175. src/linters/stateless_class/config.py +58 -0
  176. src/linters/stateless_class/linter.py +349 -0
  177. src/linters/stateless_class/python_analyzer.py +290 -0
  178. src/linters/stringly_typed/__init__.py +36 -0
  179. src/linters/stringly_typed/config.py +189 -0
  180. src/linters/stringly_typed/context_filter.py +451 -0
  181. src/linters/stringly_typed/function_call_violation_builder.py +135 -0
  182. src/linters/stringly_typed/ignore_checker.py +100 -0
  183. src/linters/stringly_typed/ignore_utils.py +51 -0
  184. src/linters/stringly_typed/linter.py +376 -0
  185. src/linters/stringly_typed/python/__init__.py +33 -0
  186. src/linters/stringly_typed/python/analyzer.py +348 -0
  187. src/linters/stringly_typed/python/call_tracker.py +175 -0
  188. src/linters/stringly_typed/python/comparison_tracker.py +257 -0
  189. src/linters/stringly_typed/python/condition_extractor.py +134 -0
  190. src/linters/stringly_typed/python/conditional_detector.py +179 -0
  191. src/linters/stringly_typed/python/constants.py +21 -0
  192. src/linters/stringly_typed/python/match_analyzer.py +94 -0
  193. src/linters/stringly_typed/python/validation_detector.py +189 -0
  194. src/linters/stringly_typed/python/variable_extractor.py +96 -0
  195. src/linters/stringly_typed/storage.py +620 -0
  196. src/linters/stringly_typed/storage_initializer.py +45 -0
  197. src/linters/stringly_typed/typescript/__init__.py +28 -0
  198. src/linters/stringly_typed/typescript/analyzer.py +157 -0
  199. src/linters/stringly_typed/typescript/call_tracker.py +335 -0
  200. src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
  201. src/linters/stringly_typed/violation_generator.py +419 -0
  202. src/orchestrator/core.py +264 -16
  203. src/orchestrator/language_detector.py +5 -3
  204. src/templates/thailint_config_template.yaml +354 -0
  205. src/utils/project_root.py +138 -16
  206. thailint-0.15.3.dist-info/METADATA +187 -0
  207. thailint-0.15.3.dist-info/RECORD +226 -0
  208. {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +1 -1
  209. thailint-0.15.3.dist-info/entry_points.txt +4 -0
  210. src/cli.py +0 -1055
  211. thailint-0.2.0.dist-info/METADATA +0 -980
  212. thailint-0.2.0.dist-info/RECORD +0 -75
  213. thailint-0.2.0.dist-info/entry_points.txt +0 -4
  214. {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info/licenses}/LICENSE +0 -0
@@ -19,6 +19,11 @@ Interfaces: TypeScriptDuplicateAnalyzer.analyze(file_path: Path, content: str, c
19
19
  Implementation: Inherits analyze() workflow from BaseTokenAnalyzer, adds JSDoc comment extraction,
20
20
  single statement detection using tree-sitter AST patterns, and interface filtering logic
21
21
 
22
+ Suppressions:
23
+ - type:ignore[assignment,misc]: Tree-sitter Node type alias (optional dependency fallback)
24
+ - invalid-name: Node type alias follows tree-sitter naming convention
25
+ - srp.violation: Complex tree-sitter AST analysis algorithm. See SRP Exception below.
26
+
22
27
  SRP Exception: TypeScriptDuplicateAnalyzer has 20 methods and 324 lines (exceeds max 8 methods/200 lines)
23
28
  Justification: Complex tree-sitter AST analysis algorithm for duplicate code detection with sophisticated
24
29
  false positive filtering. Mirrors Python analyzer structure. Methods form tightly coupled algorithm
@@ -30,15 +35,17 @@ SRP Exception: TypeScriptDuplicateAnalyzer has 20 methods and 324 lines (exceeds
30
35
  responsibility: accurately detecting duplicate TypeScript/JavaScript code while minimizing false positives.
31
36
  """
32
37
 
33
- from collections.abc import Generator
38
+ from collections.abc import Iterable
34
39
  from pathlib import Path
35
40
 
36
41
  from src.analyzers.typescript_base import TREE_SITTER_AVAILABLE
37
42
 
43
+ from . import token_hasher
38
44
  from .base_token_analyzer import BaseTokenAnalyzer
39
45
  from .block_filter import BlockFilterRegistry, create_default_registry
40
46
  from .cache import CodeBlock
41
47
  from .config import DRYConfig
48
+ from .typescript_statement_detector import is_single_statement, should_include_block
42
49
 
43
50
  if TREE_SITTER_AVAILABLE:
44
51
  from tree_sitter import Node
@@ -84,16 +91,33 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
84
91
  # Generate rolling hash windows
85
92
  windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
86
93
 
87
- blocks = []
88
- for hash_val, start_line, end_line, snippet in windows:
89
- # Filter interface/type definitions
90
- if not self._should_include_block(content, start_line, end_line):
91
- continue
94
+ # Filter out interface/type definitions and single statement patterns
95
+ valid_windows = (
96
+ (hash_val, start_line, end_line, snippet)
97
+ for hash_val, start_line, end_line, snippet in windows
98
+ if should_include_block(content, start_line, end_line)
99
+ and not is_single_statement(content, start_line, end_line)
100
+ )
101
+ return self._build_blocks(valid_windows, file_path, content)
102
+
103
+ def _build_blocks(
104
+ self,
105
+ windows: Iterable[tuple[int, int, int, str]],
106
+ file_path: Path,
107
+ content: str,
108
+ ) -> list[CodeBlock]:
109
+ """Build CodeBlock objects from valid windows, applying filters.
92
110
 
93
- # Filter single statement patterns
94
- if self._is_single_statement_in_source(content, start_line, end_line):
95
- continue
111
+ Args:
112
+ windows: Iterable of (hash_val, start_line, end_line, snippet) tuples
113
+ file_path: Path to source file
114
+ content: File content
96
115
 
116
+ Returns:
117
+ List of CodeBlock instances that pass all filters
118
+ """
119
+ blocks = []
120
+ for hash_val, start_line, end_line, snippet in windows:
97
121
  block = CodeBlock(
98
122
  file_path=file_path,
99
123
  start_line=start_line,
@@ -101,13 +125,8 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
101
125
  snippet=snippet,
102
126
  hash_value=hash_val,
103
127
  )
104
-
105
- # Apply extensible filters (keyword arguments, imports, etc.)
106
- if self._filter_registry.should_filter_block(block, content):
107
- continue
108
-
109
- blocks.append(block)
110
-
128
+ if not self._filter_registry.should_filter_block(block, content):
129
+ blocks.append(block)
111
130
  return blocks
112
131
 
113
132
  def _get_jsdoc_ranges_from_content(self, content: str) -> set[int]:
@@ -186,25 +205,45 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
186
205
  List of (original_line_number, normalized_code) tuples
187
206
  """
188
207
  lines_with_numbers = []
208
+ in_multiline_import = False
189
209
 
190
- for line_num, line in enumerate(content.split("\n"), start=1):
191
- # Skip JSDoc comment lines
192
- if line_num in jsdoc_lines:
193
- continue
210
+ # Skip JSDoc comment lines
211
+ non_jsdoc_lines = (
212
+ (line_num, line)
213
+ for line_num, line in enumerate(content.split("\n"), start=1)
214
+ if line_num not in jsdoc_lines
215
+ )
216
+ for line_num, line in non_jsdoc_lines:
217
+ in_multiline_import, normalized = self._normalize_and_filter_line(
218
+ line, in_multiline_import
219
+ )
220
+ if normalized is not None:
221
+ lines_with_numbers.append((line_num, normalized))
194
222
 
195
- # Use hasher's existing tokenization logic
196
- line = self._hasher._strip_comments(line) # pylint: disable=protected-access
197
- line = " ".join(line.split())
223
+ return lines_with_numbers
198
224
 
199
- if not line:
200
- continue
225
+ def _normalize_and_filter_line(
226
+ self, line: str, in_multiline_import: bool
227
+ ) -> tuple[bool, str | None]:
228
+ """Normalize line and check if it should be included.
201
229
 
202
- if self._hasher._is_import_statement(line): # pylint: disable=protected-access
203
- continue
230
+ Args:
231
+ line: Raw source line
232
+ in_multiline_import: Current multi-line import state
204
233
 
205
- lines_with_numbers.append((line_num, line))
234
+ Returns:
235
+ Tuple of (new_import_state, normalized_line or None if should skip)
236
+ """
237
+ normalized = token_hasher.normalize_line(line)
238
+ if not normalized:
239
+ return in_multiline_import, None
206
240
 
207
- return lines_with_numbers
241
+ new_state, should_skip = token_hasher.should_skip_import_line(
242
+ normalized, in_multiline_import
243
+ )
244
+ if should_skip:
245
+ return new_state, None
246
+ return new_state, normalized
208
247
 
209
248
  def _rolling_hash_with_tracking(
210
249
  self, lines_with_numbers: list[tuple[int, str]], window_size: int
@@ -237,354 +276,3 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
237
276
  hashes.append((hash_val, start_line, end_line, snippet))
238
277
 
239
278
  return hashes
240
-
241
- def _should_include_block(self, content: str, start_line: int, end_line: int) -> bool:
242
- """Filter out blocks that overlap with interface/type definitions.
243
-
244
- Args:
245
- content: File content
246
- start_line: Block start line
247
- end_line: Block end line
248
-
249
- Returns:
250
- False if block overlaps interface definition, True otherwise
251
- """
252
- interface_ranges = self._find_interface_ranges(content)
253
- return not self._overlaps_interface(start_line, end_line, interface_ranges)
254
-
255
- def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
256
- """Check if a line range in the original source is a single logical statement.
257
-
258
- Uses tree-sitter AST analysis to detect patterns like:
259
- - Decorators (@Component(...))
260
- - Function call arguments
261
- - Object literal properties
262
- - Class field definitions
263
- - Type assertions
264
- - Chained method calls (single expression)
265
-
266
- Args:
267
- content: TypeScript source code
268
- start_line: Starting line number (1-indexed)
269
- end_line: Ending line number (1-indexed)
270
-
271
- Returns:
272
- True if this range represents a single logical statement/expression
273
- """
274
- if not TREE_SITTER_AVAILABLE:
275
- return False
276
-
277
- from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
278
-
279
- analyzer = TypeScriptBaseAnalyzer()
280
- root = analyzer.parse_typescript(content)
281
- if not root:
282
- return False
283
-
284
- return self._check_overlapping_nodes(root, start_line, end_line)
285
-
286
- def _check_overlapping_nodes(self, root: Node, start_line: int, end_line: int) -> bool:
287
- """Check if any AST node overlaps and matches single-statement pattern.
288
-
289
- Args:
290
- root: Root tree-sitter node
291
- start_line: Starting line number (1-indexed)
292
- end_line: Ending line number (1-indexed)
293
-
294
- Returns:
295
- True if any node matches single-statement pattern
296
- """
297
- # Convert to 0-indexed for tree-sitter
298
- ts_start = start_line - 1
299
- ts_end = end_line - 1
300
-
301
- for node in self._walk_nodes(root):
302
- if self._node_overlaps_and_matches(node, ts_start, ts_end):
303
- return True
304
- return False
305
-
306
- def _walk_nodes(self, node: Node) -> Generator[Node, None, None]:
307
- """Generator to walk all nodes in tree.
308
-
309
- Args:
310
- node: Starting node
311
-
312
- Yields:
313
- All nodes in tree
314
- """
315
- yield node
316
- for child in node.children:
317
- yield from self._walk_nodes(child)
318
-
319
- def _node_overlaps_and_matches(self, node: Node, ts_start: int, ts_end: int) -> bool:
320
- """Check if node overlaps with range and matches single-statement pattern.
321
-
322
- Args:
323
- node: Tree-sitter node
324
- ts_start: Starting line (0-indexed)
325
- ts_end: Ending line (0-indexed)
326
-
327
- Returns:
328
- True if node overlaps and matches pattern
329
- """
330
- node_start = node.start_point[0]
331
- node_end = node.end_point[0]
332
-
333
- # Check if ranges overlap
334
- overlaps = not (node_end < ts_start or node_start > ts_end)
335
- if not overlaps:
336
- return False
337
-
338
- return self._is_single_statement_pattern(node, ts_start, ts_end)
339
-
340
- def _matches_simple_container_pattern(self, node: Node, contains: bool) -> bool:
341
- """Check if node is a simple container pattern (decorator, object, etc.).
342
-
343
- Args:
344
- node: AST node to check
345
- contains: Whether node contains the range
346
-
347
- Returns:
348
- True if node matches simple container pattern
349
- """
350
- simple_types = (
351
- "decorator",
352
- "object",
353
- "member_expression",
354
- "as_expression",
355
- "array_pattern",
356
- )
357
- return node.type in simple_types and contains
358
-
359
- def _matches_call_expression_pattern(
360
- self, node: Node, ts_start: int, ts_end: int, contains: bool
361
- ) -> bool:
362
- """Check if node is a call expression pattern.
363
-
364
- Args:
365
- node: AST node to check
366
- ts_start: Starting line (0-indexed)
367
- ts_end: Ending line (0-indexed)
368
- contains: Whether node contains the range
369
-
370
- Returns:
371
- True if node matches call expression pattern
372
- """
373
- if node.type != "call_expression":
374
- return False
375
-
376
- # Check if this is a multi-line call containing the range
377
- node_start = node.start_point[0]
378
- node_end = node.end_point[0]
379
- is_multiline = node_start < node_end
380
- if is_multiline and node_start <= ts_start <= node_end:
381
- return True
382
-
383
- return contains
384
-
385
- def _matches_declaration_pattern(self, node: Node, contains: bool) -> bool:
386
- """Check if node is a lexical declaration pattern.
387
-
388
- Args:
389
- node: AST node to check
390
- contains: Whether node contains the range
391
-
392
- Returns:
393
- True if node matches declaration pattern (excluding function bodies)
394
- """
395
- if node.type != "lexical_declaration" or not contains:
396
- return False
397
-
398
- # Only filter if simple value assignment, NOT a function body
399
- if self._contains_function_body(node):
400
- return False
401
-
402
- return True
403
-
404
- def _matches_jsx_pattern(self, node: Node, contains: bool) -> bool:
405
- """Check if node is a JSX element pattern.
406
-
407
- Args:
408
- node: AST node to check
409
- contains: Whether node contains the range
410
-
411
- Returns:
412
- True if node matches JSX pattern
413
- """
414
- jsx_types = ("jsx_opening_element", "jsx_self_closing_element")
415
- return node.type in jsx_types and contains
416
-
417
- def _matches_class_body_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
418
- """Check if node is a class body field definition pattern.
419
-
420
- Args:
421
- node: AST node to check
422
- ts_start: Starting line (0-indexed)
423
- ts_end: Ending line (0-indexed)
424
-
425
- Returns:
426
- True if node is class body with field definitions
427
- """
428
- if node.type != "class_body":
429
- return False
430
-
431
- return self._is_in_class_field_area(node, ts_start, ts_end)
432
-
433
- def _is_single_statement_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
434
- """Check if an AST node represents a single-statement pattern to filter.
435
-
436
- Delegates to specialized pattern matchers for different AST node categories.
437
-
438
- Args:
439
- node: AST node that overlaps with the line range
440
- ts_start: Starting line number (0-indexed)
441
- ts_end: Ending line number (0-indexed)
442
-
443
- Returns:
444
- True if this node represents a single logical statement pattern
445
- """
446
- node_start = node.start_point[0]
447
- node_end = node.end_point[0]
448
- contains = (node_start <= ts_start) and (node_end >= ts_end)
449
-
450
- # Check pattern categories using specialized helpers - use list for any()
451
- matchers = [
452
- self._matches_simple_container_pattern(node, contains),
453
- self._matches_call_expression_pattern(node, ts_start, ts_end, contains),
454
- self._matches_declaration_pattern(node, contains),
455
- self._matches_jsx_pattern(node, contains),
456
- self._matches_class_body_pattern(node, ts_start, ts_end),
457
- ]
458
- return any(matchers)
459
-
460
- def _contains_function_body(self, node: Node) -> bool:
461
- """Check if node contains an arrow function or function expression.
462
-
463
- Args:
464
- node: Node to check
465
-
466
- Returns:
467
- True if node contains a function with a body
468
- """
469
- for child in node.children:
470
- if child.type in ("arrow_function", "function", "function_expression"):
471
- return True
472
- if self._contains_function_body(child):
473
- return True
474
- return False
475
-
476
- def _find_first_method_line(self, class_body: Node) -> int | None:
477
- """Find line number of first method in class body.
478
-
479
- Args:
480
- class_body: Class body node
481
-
482
- Returns:
483
- Line number of first method or None if no methods
484
- """
485
- for child in class_body.children:
486
- if child.type in ("method_definition", "function_declaration"):
487
- return child.start_point[0]
488
- return None
489
-
490
- def _is_in_class_field_area(self, class_body: Node, ts_start: int, ts_end: int) -> bool:
491
- """Check if range is in class field definition area (before methods).
492
-
493
- Args:
494
- class_body: Class body node
495
- ts_start: Starting line (0-indexed)
496
- ts_end: Ending line (0-indexed)
497
-
498
- Returns:
499
- True if range is in field area
500
- """
501
- first_method_line = self._find_first_method_line(class_body)
502
- class_start = class_body.start_point[0]
503
- class_end = class_body.end_point[0]
504
-
505
- # No methods: check if range is in class body
506
- if first_method_line is None:
507
- return class_start <= ts_start and class_end >= ts_end
508
-
509
- # Has methods: check if range is before first method
510
- return class_start <= ts_start and ts_end < first_method_line
511
-
512
- def _find_interface_ranges(self, content: str) -> list[tuple[int, int]]:
513
- """Find line ranges of interface/type definitions.
514
-
515
- Args:
516
- content: File content
517
-
518
- Returns:
519
- List of (start_line, end_line) tuples for interface blocks
520
- """
521
- ranges: list[tuple[int, int]] = []
522
- lines = content.split("\n")
523
- state = {"in_interface": False, "start_line": 0, "brace_count": 0}
524
-
525
- for i, line in enumerate(lines, start=1):
526
- stripped = line.strip()
527
- self._process_line_for_interface(stripped, i, state, ranges)
528
-
529
- return ranges
530
-
531
- def _process_line_for_interface(
532
- self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
533
- ) -> None:
534
- """Process single line for interface detection.
535
-
536
- Args:
537
- stripped: Stripped line content
538
- line_num: Line number
539
- state: Tracking state (in_interface, start_line, brace_count)
540
- ranges: Accumulated interface ranges
541
- """
542
- if self._is_interface_start(stripped):
543
- self._handle_interface_start(stripped, line_num, state, ranges)
544
- return
545
-
546
- if state["in_interface"]:
547
- self._handle_interface_continuation(stripped, line_num, state, ranges)
548
-
549
- def _is_interface_start(self, stripped: str) -> bool:
550
- """Check if line starts interface/type definition."""
551
- return stripped.startswith(("interface ", "type ")) and "{" in stripped
552
-
553
- def _handle_interface_start(
554
- self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
555
- ) -> None:
556
- """Handle start of interface definition."""
557
- state["in_interface"] = True
558
- state["start_line"] = line_num
559
- state["brace_count"] = stripped.count("{") - stripped.count("}")
560
-
561
- if state["brace_count"] == 0: # Single-line interface
562
- ranges.append((line_num, line_num))
563
- state["in_interface"] = False
564
-
565
- def _handle_interface_continuation(
566
- self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
567
- ) -> None:
568
- """Handle continuation of interface definition."""
569
- state["brace_count"] += stripped.count("{") - stripped.count("}")
570
- if state["brace_count"] == 0:
571
- ranges.append((state["start_line"], line_num))
572
- state["in_interface"] = False
573
-
574
- def _overlaps_interface(
575
- self, start: int, end: int, interface_ranges: list[tuple[int, int]]
576
- ) -> bool:
577
- """Check if block overlaps with any interface range.
578
-
579
- Args:
580
- start: Block start line
581
- end: Block end line
582
- interface_ranges: List of interface definition ranges
583
-
584
- Returns:
585
- True if block overlaps with an interface
586
- """
587
- for if_start, if_end in interface_ranges:
588
- if start <= if_end and end >= if_start:
589
- return True
590
- return False
@@ -0,0 +1,138 @@
1
+ """
2
+ Purpose: Extract TypeScript module-level constants using tree-sitter parsing
3
+
4
+ Scope: TypeScript constant extraction for duplicate constants detection
5
+
6
+ Overview: Extracts module-level constant definitions from TypeScript source code using tree-sitter.
7
+ Identifies constants as top-level `const` declarations where the variable name matches the
8
+ UPPER_SNAKE_CASE naming convention (e.g., const API_TIMEOUT = 30). Excludes non-const
9
+ declarations (let, var), class-level constants, and function-level constants to focus on
10
+ public module constants that should be consolidated across files.
11
+
12
+ Dependencies: tree-sitter, tree-sitter-typescript, re for pattern matching, ConstantInfo,
13
+ TypeScriptValueExtractor
14
+
15
+ Exports: TypeScriptConstantExtractor class
16
+
17
+ Interfaces: TypeScriptConstantExtractor.extract(content: str) -> list[ConstantInfo]
18
+
19
+ Implementation: Tree-sitter-based parsing with const declaration filtering and ALL_CAPS regex matching
20
+
21
+ Suppressions:
22
+ - type:ignore[assignment,misc]: Tree-sitter Node type alias (optional dependency fallback)
23
+ - broad-exception-caught: Defensive parsing for malformed TypeScript code
24
+ """
25
+
26
+ from typing import Any
27
+
28
+ from src.analyzers.typescript_base import TREE_SITTER_AVAILABLE, TS_PARSER
29
+
30
+ from .constant import CONSTANT_NAME_PATTERN, ConstantInfo
31
+ from .typescript_value_extractor import TypeScriptValueExtractor
32
+
33
+ if TREE_SITTER_AVAILABLE:
34
+ from tree_sitter import Node
35
+ else:
36
+ Node = Any # type: ignore[assignment,misc]
37
+
38
+ # Node types that represent values
39
+ VALUE_TYPES = frozenset(
40
+ (
41
+ "number",
42
+ "string",
43
+ "true",
44
+ "false",
45
+ "null",
46
+ "identifier",
47
+ "array",
48
+ "object",
49
+ "call_expression",
50
+ )
51
+ )
52
+
53
+
54
+ class TypeScriptConstantExtractor:
55
+ """Extracts module-level constants from TypeScript source code."""
56
+
57
+ def __init__(self) -> None:
58
+ """Initialize the TypeScript constant extractor."""
59
+ self.tree_sitter_available = TREE_SITTER_AVAILABLE
60
+ self._value_extractor = TypeScriptValueExtractor()
61
+
62
+ def extract(self, content: str) -> list[ConstantInfo]:
63
+ """Extract constants from TypeScript source code."""
64
+ root = _parse_content(content)
65
+ if root is None:
66
+ return []
67
+ constants: list[ConstantInfo] = []
68
+ for child in root.children:
69
+ constants.extend(self._extract_from_node(child, content))
70
+ return constants
71
+
72
+ def _extract_from_node(self, node: Node, content: str) -> list[ConstantInfo]:
73
+ """Extract constants from a single AST node."""
74
+ if node.type == "lexical_declaration":
75
+ return self._extract_from_lexical_declaration(node, content)
76
+ if node.type == "export_statement":
77
+ return self._extract_from_export(node, content)
78
+ return []
79
+
80
+ def _extract_from_lexical_declaration(self, node: Node, content: str) -> list[ConstantInfo]:
81
+ """Extract constants from a lexical declaration."""
82
+ if not _is_const_declaration(node):
83
+ return []
84
+ return [
85
+ info
86
+ for c in node.children
87
+ if c.type == "variable_declarator"
88
+ and (info := self._extract_from_declarator(c, content))
89
+ ]
90
+
91
+ def _extract_from_export(self, node: Node, content: str) -> list[ConstantInfo]:
92
+ """Extract constants from an export statement."""
93
+ for child in node.children:
94
+ if child.type == "lexical_declaration":
95
+ return self._extract_from_lexical_declaration(child, content)
96
+ return []
97
+
98
+ def _extract_from_declarator(self, node: Node, content: str) -> ConstantInfo | None:
99
+ """Extract constant info from a variable declarator."""
100
+ name, value = _get_name_and_value(node, content, self._value_extractor)
101
+ if not name or not _is_constant_name(name):
102
+ return None
103
+ return ConstantInfo(name=name, line_number=node.start_point[0] + 1, value=value)
104
+
105
+
106
+ def _parse_content(content: str) -> Node | None:
107
+ """Parse content and return root node, or None on failure."""
108
+ if not TREE_SITTER_AVAILABLE or TS_PARSER is None:
109
+ return None
110
+ try:
111
+ return TS_PARSER.parse(bytes(content, "utf8")).root_node
112
+ except Exception: # pylint: disable=broad-exception-caught
113
+ return None
114
+
115
+
116
+ def _is_const_declaration(node: Node) -> bool:
117
+ """Check if lexical declaration is a const."""
118
+ return any(child.type == "const" for child in node.children)
119
+
120
+
121
+ def _get_name_and_value(
122
+ node: Node, content: str, extractor: TypeScriptValueExtractor
123
+ ) -> tuple[str | None, str | None]:
124
+ """Extract name and value from declarator node."""
125
+ name = next(
126
+ (extractor.get_node_text(c, content) for c in node.children if c.type == "identifier"),
127
+ None,
128
+ )
129
+ value = next(
130
+ (extractor.get_value_string(c, content) for c in node.children if c.type in VALUE_TYPES),
131
+ None,
132
+ )
133
+ return name, value
134
+
135
+
136
+ def _is_constant_name(name: str) -> bool:
137
+ """Check if name matches constant naming convention."""
138
+ return not name.startswith("_") and bool(CONSTANT_NAME_PATTERN.match(name))