thailint 0.5.0__py3-none-any.whl → 0.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +1 -0
- src/analyzers/__init__.py +4 -3
- src/analyzers/ast_utils.py +54 -0
- src/analyzers/rust_base.py +155 -0
- src/analyzers/rust_context.py +141 -0
- src/analyzers/typescript_base.py +4 -0
- src/cli/__init__.py +30 -0
- src/cli/__main__.py +22 -0
- src/cli/config.py +480 -0
- src/cli/config_merge.py +241 -0
- src/cli/linters/__init__.py +67 -0
- src/cli/linters/code_patterns.py +270 -0
- src/cli/linters/code_smells.py +342 -0
- src/cli/linters/documentation.py +83 -0
- src/cli/linters/performance.py +287 -0
- src/cli/linters/shared.py +331 -0
- src/cli/linters/structure.py +327 -0
- src/cli/linters/structure_quality.py +328 -0
- src/cli/main.py +120 -0
- src/cli/utils.py +395 -0
- src/cli_main.py +37 -0
- src/config.py +38 -25
- src/core/base.py +7 -2
- src/core/cli_utils.py +19 -2
- src/core/config_parser.py +5 -2
- src/core/constants.py +54 -0
- src/core/linter_utils.py +95 -6
- src/core/python_lint_rule.py +101 -0
- src/core/registry.py +1 -1
- src/core/rule_discovery.py +147 -84
- src/core/types.py +13 -0
- src/core/violation_builder.py +78 -15
- src/core/violation_utils.py +69 -0
- src/formatters/__init__.py +22 -0
- src/formatters/sarif.py +202 -0
- src/linter_config/directive_markers.py +109 -0
- src/linter_config/ignore.py +254 -395
- src/linter_config/loader.py +45 -12
- src/linter_config/pattern_utils.py +65 -0
- src/linter_config/rule_matcher.py +89 -0
- src/linters/collection_pipeline/__init__.py +90 -0
- src/linters/collection_pipeline/any_all_analyzer.py +281 -0
- src/linters/collection_pipeline/ast_utils.py +40 -0
- src/linters/collection_pipeline/config.py +75 -0
- src/linters/collection_pipeline/continue_analyzer.py +94 -0
- src/linters/collection_pipeline/detector.py +360 -0
- src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
- src/linters/collection_pipeline/linter.py +420 -0
- src/linters/collection_pipeline/suggestion_builder.py +130 -0
- src/linters/cqs/__init__.py +54 -0
- src/linters/cqs/config.py +55 -0
- src/linters/cqs/function_analyzer.py +201 -0
- src/linters/cqs/input_detector.py +139 -0
- src/linters/cqs/linter.py +159 -0
- src/linters/cqs/output_detector.py +84 -0
- src/linters/cqs/python_analyzer.py +54 -0
- src/linters/cqs/types.py +82 -0
- src/linters/cqs/typescript_cqs_analyzer.py +61 -0
- src/linters/cqs/typescript_function_analyzer.py +192 -0
- src/linters/cqs/typescript_input_detector.py +203 -0
- src/linters/cqs/typescript_output_detector.py +117 -0
- src/linters/cqs/violation_builder.py +94 -0
- src/linters/dry/base_token_analyzer.py +16 -9
- src/linters/dry/block_filter.py +120 -20
- src/linters/dry/block_grouper.py +4 -0
- src/linters/dry/cache.py +104 -10
- src/linters/dry/cache_query.py +4 -0
- src/linters/dry/config.py +54 -11
- src/linters/dry/constant.py +92 -0
- src/linters/dry/constant_matcher.py +223 -0
- src/linters/dry/constant_violation_builder.py +98 -0
- src/linters/dry/duplicate_storage.py +5 -4
- src/linters/dry/file_analyzer.py +4 -2
- src/linters/dry/inline_ignore.py +7 -16
- src/linters/dry/linter.py +183 -48
- src/linters/dry/python_analyzer.py +60 -439
- src/linters/dry/python_constant_extractor.py +100 -0
- src/linters/dry/single_statement_detector.py +417 -0
- src/linters/dry/token_hasher.py +116 -112
- src/linters/dry/typescript_analyzer.py +68 -382
- src/linters/dry/typescript_constant_extractor.py +138 -0
- src/linters/dry/typescript_statement_detector.py +255 -0
- src/linters/dry/typescript_value_extractor.py +70 -0
- src/linters/dry/violation_builder.py +4 -0
- src/linters/dry/violation_filter.py +5 -4
- src/linters/dry/violation_generator.py +71 -14
- src/linters/file_header/atemporal_detector.py +68 -50
- src/linters/file_header/base_parser.py +93 -0
- src/linters/file_header/bash_parser.py +66 -0
- src/linters/file_header/config.py +90 -16
- src/linters/file_header/css_parser.py +70 -0
- src/linters/file_header/field_validator.py +36 -33
- src/linters/file_header/linter.py +140 -144
- src/linters/file_header/markdown_parser.py +130 -0
- src/linters/file_header/python_parser.py +14 -58
- src/linters/file_header/typescript_parser.py +73 -0
- src/linters/file_header/violation_builder.py +13 -12
- src/linters/file_placement/config_loader.py +3 -1
- src/linters/file_placement/directory_matcher.py +4 -0
- src/linters/file_placement/linter.py +66 -34
- src/linters/file_placement/pattern_matcher.py +41 -6
- src/linters/file_placement/pattern_validator.py +31 -12
- src/linters/file_placement/rule_checker.py +12 -7
- src/linters/lazy_ignores/__init__.py +43 -0
- src/linters/lazy_ignores/config.py +74 -0
- src/linters/lazy_ignores/directive_utils.py +164 -0
- src/linters/lazy_ignores/header_parser.py +177 -0
- src/linters/lazy_ignores/linter.py +158 -0
- src/linters/lazy_ignores/matcher.py +168 -0
- src/linters/lazy_ignores/python_analyzer.py +209 -0
- src/linters/lazy_ignores/rule_id_utils.py +180 -0
- src/linters/lazy_ignores/skip_detector.py +298 -0
- src/linters/lazy_ignores/types.py +71 -0
- src/linters/lazy_ignores/typescript_analyzer.py +146 -0
- src/linters/lazy_ignores/violation_builder.py +135 -0
- src/linters/lbyl/__init__.py +31 -0
- src/linters/lbyl/config.py +63 -0
- src/linters/lbyl/linter.py +67 -0
- src/linters/lbyl/pattern_detectors/__init__.py +53 -0
- src/linters/lbyl/pattern_detectors/base.py +63 -0
- src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
- src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
- src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
- src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
- src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
- src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
- src/linters/lbyl/python_analyzer.py +215 -0
- src/linters/lbyl/violation_builder.py +354 -0
- src/linters/magic_numbers/context_analyzer.py +227 -225
- src/linters/magic_numbers/linter.py +28 -82
- src/linters/magic_numbers/python_analyzer.py +4 -16
- src/linters/magic_numbers/typescript_analyzer.py +9 -12
- src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
- src/linters/method_property/__init__.py +49 -0
- src/linters/method_property/config.py +138 -0
- src/linters/method_property/linter.py +414 -0
- src/linters/method_property/python_analyzer.py +473 -0
- src/linters/method_property/violation_builder.py +119 -0
- src/linters/nesting/linter.py +24 -16
- src/linters/nesting/python_analyzer.py +4 -0
- src/linters/nesting/typescript_analyzer.py +6 -12
- src/linters/nesting/violation_builder.py +1 -0
- src/linters/performance/__init__.py +91 -0
- src/linters/performance/config.py +43 -0
- src/linters/performance/constants.py +49 -0
- src/linters/performance/linter.py +149 -0
- src/linters/performance/python_analyzer.py +365 -0
- src/linters/performance/regex_analyzer.py +312 -0
- src/linters/performance/regex_linter.py +139 -0
- src/linters/performance/typescript_analyzer.py +236 -0
- src/linters/performance/violation_builder.py +160 -0
- src/linters/print_statements/config.py +7 -12
- src/linters/print_statements/linter.py +26 -43
- src/linters/print_statements/python_analyzer.py +91 -93
- src/linters/print_statements/typescript_analyzer.py +15 -25
- src/linters/print_statements/violation_builder.py +12 -14
- src/linters/srp/class_analyzer.py +11 -7
- src/linters/srp/heuristics.py +56 -22
- src/linters/srp/linter.py +15 -16
- src/linters/srp/python_analyzer.py +55 -20
- src/linters/srp/typescript_metrics_calculator.py +110 -50
- src/linters/stateless_class/__init__.py +25 -0
- src/linters/stateless_class/config.py +58 -0
- src/linters/stateless_class/linter.py +349 -0
- src/linters/stateless_class/python_analyzer.py +290 -0
- src/linters/stringly_typed/__init__.py +36 -0
- src/linters/stringly_typed/config.py +189 -0
- src/linters/stringly_typed/context_filter.py +451 -0
- src/linters/stringly_typed/function_call_violation_builder.py +135 -0
- src/linters/stringly_typed/ignore_checker.py +100 -0
- src/linters/stringly_typed/ignore_utils.py +51 -0
- src/linters/stringly_typed/linter.py +376 -0
- src/linters/stringly_typed/python/__init__.py +33 -0
- src/linters/stringly_typed/python/analyzer.py +348 -0
- src/linters/stringly_typed/python/call_tracker.py +175 -0
- src/linters/stringly_typed/python/comparison_tracker.py +257 -0
- src/linters/stringly_typed/python/condition_extractor.py +134 -0
- src/linters/stringly_typed/python/conditional_detector.py +179 -0
- src/linters/stringly_typed/python/constants.py +21 -0
- src/linters/stringly_typed/python/match_analyzer.py +94 -0
- src/linters/stringly_typed/python/validation_detector.py +189 -0
- src/linters/stringly_typed/python/variable_extractor.py +96 -0
- src/linters/stringly_typed/storage.py +620 -0
- src/linters/stringly_typed/storage_initializer.py +45 -0
- src/linters/stringly_typed/typescript/__init__.py +28 -0
- src/linters/stringly_typed/typescript/analyzer.py +157 -0
- src/linters/stringly_typed/typescript/call_tracker.py +335 -0
- src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
- src/linters/stringly_typed/violation_generator.py +419 -0
- src/orchestrator/core.py +252 -14
- src/orchestrator/language_detector.py +5 -3
- src/templates/thailint_config_template.yaml +196 -0
- src/utils/project_root.py +3 -0
- thailint-0.15.3.dist-info/METADATA +187 -0
- thailint-0.15.3.dist-info/RECORD +226 -0
- thailint-0.15.3.dist-info/entry_points.txt +4 -0
- src/cli.py +0 -1665
- thailint-0.5.0.dist-info/METADATA +0 -1286
- thailint-0.5.0.dist-info/RECORD +0 -96
- thailint-0.5.0.dist-info/entry_points.txt +0 -4
- {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +0 -0
- {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -19,6 +19,11 @@ Interfaces: TypeScriptDuplicateAnalyzer.analyze(file_path: Path, content: str, c
|
|
|
19
19
|
Implementation: Inherits analyze() workflow from BaseTokenAnalyzer, adds JSDoc comment extraction,
|
|
20
20
|
single statement detection using tree-sitter AST patterns, and interface filtering logic
|
|
21
21
|
|
|
22
|
+
Suppressions:
|
|
23
|
+
- type:ignore[assignment,misc]: Tree-sitter Node type alias (optional dependency fallback)
|
|
24
|
+
- invalid-name: Node type alias follows tree-sitter naming convention
|
|
25
|
+
- srp.violation: Complex tree-sitter AST analysis algorithm. See SRP Exception below.
|
|
26
|
+
|
|
22
27
|
SRP Exception: TypeScriptDuplicateAnalyzer has 20 methods and 324 lines (exceeds max 8 methods/200 lines)
|
|
23
28
|
Justification: Complex tree-sitter AST analysis algorithm for duplicate code detection with sophisticated
|
|
24
29
|
false positive filtering. Mirrors Python analyzer structure. Methods form tightly coupled algorithm
|
|
@@ -30,15 +35,17 @@ SRP Exception: TypeScriptDuplicateAnalyzer has 20 methods and 324 lines (exceeds
|
|
|
30
35
|
responsibility: accurately detecting duplicate TypeScript/JavaScript code while minimizing false positives.
|
|
31
36
|
"""
|
|
32
37
|
|
|
33
|
-
from collections.abc import
|
|
38
|
+
from collections.abc import Iterable
|
|
34
39
|
from pathlib import Path
|
|
35
40
|
|
|
36
41
|
from src.analyzers.typescript_base import TREE_SITTER_AVAILABLE
|
|
37
42
|
|
|
43
|
+
from . import token_hasher
|
|
38
44
|
from .base_token_analyzer import BaseTokenAnalyzer
|
|
39
45
|
from .block_filter import BlockFilterRegistry, create_default_registry
|
|
40
46
|
from .cache import CodeBlock
|
|
41
47
|
from .config import DRYConfig
|
|
48
|
+
from .typescript_statement_detector import is_single_statement, should_include_block
|
|
42
49
|
|
|
43
50
|
if TREE_SITTER_AVAILABLE:
|
|
44
51
|
from tree_sitter import Node
|
|
@@ -84,16 +91,33 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
|
|
|
84
91
|
# Generate rolling hash windows
|
|
85
92
|
windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
|
|
86
93
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
94
|
+
# Filter out interface/type definitions and single statement patterns
|
|
95
|
+
valid_windows = (
|
|
96
|
+
(hash_val, start_line, end_line, snippet)
|
|
97
|
+
for hash_val, start_line, end_line, snippet in windows
|
|
98
|
+
if should_include_block(content, start_line, end_line)
|
|
99
|
+
and not is_single_statement(content, start_line, end_line)
|
|
100
|
+
)
|
|
101
|
+
return self._build_blocks(valid_windows, file_path, content)
|
|
102
|
+
|
|
103
|
+
def _build_blocks(
|
|
104
|
+
self,
|
|
105
|
+
windows: Iterable[tuple[int, int, int, str]],
|
|
106
|
+
file_path: Path,
|
|
107
|
+
content: str,
|
|
108
|
+
) -> list[CodeBlock]:
|
|
109
|
+
"""Build CodeBlock objects from valid windows, applying filters.
|
|
92
110
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
111
|
+
Args:
|
|
112
|
+
windows: Iterable of (hash_val, start_line, end_line, snippet) tuples
|
|
113
|
+
file_path: Path to source file
|
|
114
|
+
content: File content
|
|
96
115
|
|
|
116
|
+
Returns:
|
|
117
|
+
List of CodeBlock instances that pass all filters
|
|
118
|
+
"""
|
|
119
|
+
blocks = []
|
|
120
|
+
for hash_val, start_line, end_line, snippet in windows:
|
|
97
121
|
block = CodeBlock(
|
|
98
122
|
file_path=file_path,
|
|
99
123
|
start_line=start_line,
|
|
@@ -101,13 +125,8 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
|
|
|
101
125
|
snippet=snippet,
|
|
102
126
|
hash_value=hash_val,
|
|
103
127
|
)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
if self._filter_registry.should_filter_block(block, content):
|
|
107
|
-
continue
|
|
108
|
-
|
|
109
|
-
blocks.append(block)
|
|
110
|
-
|
|
128
|
+
if not self._filter_registry.should_filter_block(block, content):
|
|
129
|
+
blocks.append(block)
|
|
111
130
|
return blocks
|
|
112
131
|
|
|
113
132
|
def _get_jsdoc_ranges_from_content(self, content: str) -> set[int]:
|
|
@@ -188,26 +207,44 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
|
|
|
188
207
|
lines_with_numbers = []
|
|
189
208
|
in_multiline_import = False
|
|
190
209
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
# Update multi-line import state and check if line should be skipped
|
|
201
|
-
in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
|
|
210
|
+
# Skip JSDoc comment lines
|
|
211
|
+
non_jsdoc_lines = (
|
|
212
|
+
(line_num, line)
|
|
213
|
+
for line_num, line in enumerate(content.split("\n"), start=1)
|
|
214
|
+
if line_num not in jsdoc_lines
|
|
215
|
+
)
|
|
216
|
+
for line_num, line in non_jsdoc_lines:
|
|
217
|
+
in_multiline_import, normalized = self._normalize_and_filter_line(
|
|
202
218
|
line, in_multiline_import
|
|
203
219
|
)
|
|
204
|
-
if
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
lines_with_numbers.append((line_num, line))
|
|
220
|
+
if normalized is not None:
|
|
221
|
+
lines_with_numbers.append((line_num, normalized))
|
|
208
222
|
|
|
209
223
|
return lines_with_numbers
|
|
210
224
|
|
|
225
|
+
def _normalize_and_filter_line(
|
|
226
|
+
self, line: str, in_multiline_import: bool
|
|
227
|
+
) -> tuple[bool, str | None]:
|
|
228
|
+
"""Normalize line and check if it should be included.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
line: Raw source line
|
|
232
|
+
in_multiline_import: Current multi-line import state
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Tuple of (new_import_state, normalized_line or None if should skip)
|
|
236
|
+
"""
|
|
237
|
+
normalized = token_hasher.normalize_line(line)
|
|
238
|
+
if not normalized:
|
|
239
|
+
return in_multiline_import, None
|
|
240
|
+
|
|
241
|
+
new_state, should_skip = token_hasher.should_skip_import_line(
|
|
242
|
+
normalized, in_multiline_import
|
|
243
|
+
)
|
|
244
|
+
if should_skip:
|
|
245
|
+
return new_state, None
|
|
246
|
+
return new_state, normalized
|
|
247
|
+
|
|
211
248
|
def _rolling_hash_with_tracking(
|
|
212
249
|
self, lines_with_numbers: list[tuple[int, str]], window_size: int
|
|
213
250
|
) -> list[tuple[int, int, int, str]]:
|
|
@@ -239,354 +276,3 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
|
|
|
239
276
|
hashes.append((hash_val, start_line, end_line, snippet))
|
|
240
277
|
|
|
241
278
|
return hashes
|
|
242
|
-
|
|
243
|
-
def _should_include_block(self, content: str, start_line: int, end_line: int) -> bool:
|
|
244
|
-
"""Filter out blocks that overlap with interface/type definitions.
|
|
245
|
-
|
|
246
|
-
Args:
|
|
247
|
-
content: File content
|
|
248
|
-
start_line: Block start line
|
|
249
|
-
end_line: Block end line
|
|
250
|
-
|
|
251
|
-
Returns:
|
|
252
|
-
False if block overlaps interface definition, True otherwise
|
|
253
|
-
"""
|
|
254
|
-
interface_ranges = self._find_interface_ranges(content)
|
|
255
|
-
return not self._overlaps_interface(start_line, end_line, interface_ranges)
|
|
256
|
-
|
|
257
|
-
def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
|
|
258
|
-
"""Check if a line range in the original source is a single logical statement.
|
|
259
|
-
|
|
260
|
-
Uses tree-sitter AST analysis to detect patterns like:
|
|
261
|
-
- Decorators (@Component(...))
|
|
262
|
-
- Function call arguments
|
|
263
|
-
- Object literal properties
|
|
264
|
-
- Class field definitions
|
|
265
|
-
- Type assertions
|
|
266
|
-
- Chained method calls (single expression)
|
|
267
|
-
|
|
268
|
-
Args:
|
|
269
|
-
content: TypeScript source code
|
|
270
|
-
start_line: Starting line number (1-indexed)
|
|
271
|
-
end_line: Ending line number (1-indexed)
|
|
272
|
-
|
|
273
|
-
Returns:
|
|
274
|
-
True if this range represents a single logical statement/expression
|
|
275
|
-
"""
|
|
276
|
-
if not TREE_SITTER_AVAILABLE:
|
|
277
|
-
return False
|
|
278
|
-
|
|
279
|
-
from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
|
|
280
|
-
|
|
281
|
-
analyzer = TypeScriptBaseAnalyzer()
|
|
282
|
-
root = analyzer.parse_typescript(content)
|
|
283
|
-
if not root:
|
|
284
|
-
return False
|
|
285
|
-
|
|
286
|
-
return self._check_overlapping_nodes(root, start_line, end_line)
|
|
287
|
-
|
|
288
|
-
def _check_overlapping_nodes(self, root: Node, start_line: int, end_line: int) -> bool:
|
|
289
|
-
"""Check if any AST node overlaps and matches single-statement pattern.
|
|
290
|
-
|
|
291
|
-
Args:
|
|
292
|
-
root: Root tree-sitter node
|
|
293
|
-
start_line: Starting line number (1-indexed)
|
|
294
|
-
end_line: Ending line number (1-indexed)
|
|
295
|
-
|
|
296
|
-
Returns:
|
|
297
|
-
True if any node matches single-statement pattern
|
|
298
|
-
"""
|
|
299
|
-
# Convert to 0-indexed for tree-sitter
|
|
300
|
-
ts_start = start_line - 1
|
|
301
|
-
ts_end = end_line - 1
|
|
302
|
-
|
|
303
|
-
for node in self._walk_nodes(root):
|
|
304
|
-
if self._node_overlaps_and_matches(node, ts_start, ts_end):
|
|
305
|
-
return True
|
|
306
|
-
return False
|
|
307
|
-
|
|
308
|
-
def _walk_nodes(self, node: Node) -> Generator[Node, None, None]:
|
|
309
|
-
"""Generator to walk all nodes in tree.
|
|
310
|
-
|
|
311
|
-
Args:
|
|
312
|
-
node: Starting node
|
|
313
|
-
|
|
314
|
-
Yields:
|
|
315
|
-
All nodes in tree
|
|
316
|
-
"""
|
|
317
|
-
yield node
|
|
318
|
-
for child in node.children:
|
|
319
|
-
yield from self._walk_nodes(child)
|
|
320
|
-
|
|
321
|
-
def _node_overlaps_and_matches(self, node: Node, ts_start: int, ts_end: int) -> bool:
|
|
322
|
-
"""Check if node overlaps with range and matches single-statement pattern.
|
|
323
|
-
|
|
324
|
-
Args:
|
|
325
|
-
node: Tree-sitter node
|
|
326
|
-
ts_start: Starting line (0-indexed)
|
|
327
|
-
ts_end: Ending line (0-indexed)
|
|
328
|
-
|
|
329
|
-
Returns:
|
|
330
|
-
True if node overlaps and matches pattern
|
|
331
|
-
"""
|
|
332
|
-
node_start = node.start_point[0]
|
|
333
|
-
node_end = node.end_point[0]
|
|
334
|
-
|
|
335
|
-
# Check if ranges overlap
|
|
336
|
-
overlaps = not (node_end < ts_start or node_start > ts_end)
|
|
337
|
-
if not overlaps:
|
|
338
|
-
return False
|
|
339
|
-
|
|
340
|
-
return self._is_single_statement_pattern(node, ts_start, ts_end)
|
|
341
|
-
|
|
342
|
-
def _matches_simple_container_pattern(self, node: Node, contains: bool) -> bool:
|
|
343
|
-
"""Check if node is a simple container pattern (decorator, object, etc.).
|
|
344
|
-
|
|
345
|
-
Args:
|
|
346
|
-
node: AST node to check
|
|
347
|
-
contains: Whether node contains the range
|
|
348
|
-
|
|
349
|
-
Returns:
|
|
350
|
-
True if node matches simple container pattern
|
|
351
|
-
"""
|
|
352
|
-
simple_types = (
|
|
353
|
-
"decorator",
|
|
354
|
-
"object",
|
|
355
|
-
"member_expression",
|
|
356
|
-
"as_expression",
|
|
357
|
-
"array_pattern",
|
|
358
|
-
)
|
|
359
|
-
return node.type in simple_types and contains
|
|
360
|
-
|
|
361
|
-
def _matches_call_expression_pattern(
|
|
362
|
-
self, node: Node, ts_start: int, ts_end: int, contains: bool
|
|
363
|
-
) -> bool:
|
|
364
|
-
"""Check if node is a call expression pattern.
|
|
365
|
-
|
|
366
|
-
Args:
|
|
367
|
-
node: AST node to check
|
|
368
|
-
ts_start: Starting line (0-indexed)
|
|
369
|
-
ts_end: Ending line (0-indexed)
|
|
370
|
-
contains: Whether node contains the range
|
|
371
|
-
|
|
372
|
-
Returns:
|
|
373
|
-
True if node matches call expression pattern
|
|
374
|
-
"""
|
|
375
|
-
if node.type != "call_expression":
|
|
376
|
-
return False
|
|
377
|
-
|
|
378
|
-
# Check if this is a multi-line call containing the range
|
|
379
|
-
node_start = node.start_point[0]
|
|
380
|
-
node_end = node.end_point[0]
|
|
381
|
-
is_multiline = node_start < node_end
|
|
382
|
-
if is_multiline and node_start <= ts_start <= node_end:
|
|
383
|
-
return True
|
|
384
|
-
|
|
385
|
-
return contains
|
|
386
|
-
|
|
387
|
-
def _matches_declaration_pattern(self, node: Node, contains: bool) -> bool:
|
|
388
|
-
"""Check if node is a lexical declaration pattern.
|
|
389
|
-
|
|
390
|
-
Args:
|
|
391
|
-
node: AST node to check
|
|
392
|
-
contains: Whether node contains the range
|
|
393
|
-
|
|
394
|
-
Returns:
|
|
395
|
-
True if node matches declaration pattern (excluding function bodies)
|
|
396
|
-
"""
|
|
397
|
-
if node.type != "lexical_declaration" or not contains:
|
|
398
|
-
return False
|
|
399
|
-
|
|
400
|
-
# Only filter if simple value assignment, NOT a function body
|
|
401
|
-
if self._contains_function_body(node):
|
|
402
|
-
return False
|
|
403
|
-
|
|
404
|
-
return True
|
|
405
|
-
|
|
406
|
-
def _matches_jsx_pattern(self, node: Node, contains: bool) -> bool:
|
|
407
|
-
"""Check if node is a JSX element pattern.
|
|
408
|
-
|
|
409
|
-
Args:
|
|
410
|
-
node: AST node to check
|
|
411
|
-
contains: Whether node contains the range
|
|
412
|
-
|
|
413
|
-
Returns:
|
|
414
|
-
True if node matches JSX pattern
|
|
415
|
-
"""
|
|
416
|
-
jsx_types = ("jsx_opening_element", "jsx_self_closing_element")
|
|
417
|
-
return node.type in jsx_types and contains
|
|
418
|
-
|
|
419
|
-
def _matches_class_body_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
|
|
420
|
-
"""Check if node is a class body field definition pattern.
|
|
421
|
-
|
|
422
|
-
Args:
|
|
423
|
-
node: AST node to check
|
|
424
|
-
ts_start: Starting line (0-indexed)
|
|
425
|
-
ts_end: Ending line (0-indexed)
|
|
426
|
-
|
|
427
|
-
Returns:
|
|
428
|
-
True if node is class body with field definitions
|
|
429
|
-
"""
|
|
430
|
-
if node.type != "class_body":
|
|
431
|
-
return False
|
|
432
|
-
|
|
433
|
-
return self._is_in_class_field_area(node, ts_start, ts_end)
|
|
434
|
-
|
|
435
|
-
def _is_single_statement_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
|
|
436
|
-
"""Check if an AST node represents a single-statement pattern to filter.
|
|
437
|
-
|
|
438
|
-
Delegates to specialized pattern matchers for different AST node categories.
|
|
439
|
-
|
|
440
|
-
Args:
|
|
441
|
-
node: AST node that overlaps with the line range
|
|
442
|
-
ts_start: Starting line number (0-indexed)
|
|
443
|
-
ts_end: Ending line number (0-indexed)
|
|
444
|
-
|
|
445
|
-
Returns:
|
|
446
|
-
True if this node represents a single logical statement pattern
|
|
447
|
-
"""
|
|
448
|
-
node_start = node.start_point[0]
|
|
449
|
-
node_end = node.end_point[0]
|
|
450
|
-
contains = (node_start <= ts_start) and (node_end >= ts_end)
|
|
451
|
-
|
|
452
|
-
# Check pattern categories using specialized helpers - use list for any()
|
|
453
|
-
matchers = [
|
|
454
|
-
self._matches_simple_container_pattern(node, contains),
|
|
455
|
-
self._matches_call_expression_pattern(node, ts_start, ts_end, contains),
|
|
456
|
-
self._matches_declaration_pattern(node, contains),
|
|
457
|
-
self._matches_jsx_pattern(node, contains),
|
|
458
|
-
self._matches_class_body_pattern(node, ts_start, ts_end),
|
|
459
|
-
]
|
|
460
|
-
return any(matchers)
|
|
461
|
-
|
|
462
|
-
def _contains_function_body(self, node: Node) -> bool:
|
|
463
|
-
"""Check if node contains an arrow function or function expression.
|
|
464
|
-
|
|
465
|
-
Args:
|
|
466
|
-
node: Node to check
|
|
467
|
-
|
|
468
|
-
Returns:
|
|
469
|
-
True if node contains a function with a body
|
|
470
|
-
"""
|
|
471
|
-
for child in node.children:
|
|
472
|
-
if child.type in ("arrow_function", "function", "function_expression"):
|
|
473
|
-
return True
|
|
474
|
-
if self._contains_function_body(child):
|
|
475
|
-
return True
|
|
476
|
-
return False
|
|
477
|
-
|
|
478
|
-
def _find_first_method_line(self, class_body: Node) -> int | None:
|
|
479
|
-
"""Find line number of first method in class body.
|
|
480
|
-
|
|
481
|
-
Args:
|
|
482
|
-
class_body: Class body node
|
|
483
|
-
|
|
484
|
-
Returns:
|
|
485
|
-
Line number of first method or None if no methods
|
|
486
|
-
"""
|
|
487
|
-
for child in class_body.children:
|
|
488
|
-
if child.type in ("method_definition", "function_declaration"):
|
|
489
|
-
return child.start_point[0]
|
|
490
|
-
return None
|
|
491
|
-
|
|
492
|
-
def _is_in_class_field_area(self, class_body: Node, ts_start: int, ts_end: int) -> bool:
|
|
493
|
-
"""Check if range is in class field definition area (before methods).
|
|
494
|
-
|
|
495
|
-
Args:
|
|
496
|
-
class_body: Class body node
|
|
497
|
-
ts_start: Starting line (0-indexed)
|
|
498
|
-
ts_end: Ending line (0-indexed)
|
|
499
|
-
|
|
500
|
-
Returns:
|
|
501
|
-
True if range is in field area
|
|
502
|
-
"""
|
|
503
|
-
first_method_line = self._find_first_method_line(class_body)
|
|
504
|
-
class_start = class_body.start_point[0]
|
|
505
|
-
class_end = class_body.end_point[0]
|
|
506
|
-
|
|
507
|
-
# No methods: check if range is in class body
|
|
508
|
-
if first_method_line is None:
|
|
509
|
-
return class_start <= ts_start and class_end >= ts_end
|
|
510
|
-
|
|
511
|
-
# Has methods: check if range is before first method
|
|
512
|
-
return class_start <= ts_start and ts_end < first_method_line
|
|
513
|
-
|
|
514
|
-
def _find_interface_ranges(self, content: str) -> list[tuple[int, int]]:
|
|
515
|
-
"""Find line ranges of interface/type definitions.
|
|
516
|
-
|
|
517
|
-
Args:
|
|
518
|
-
content: File content
|
|
519
|
-
|
|
520
|
-
Returns:
|
|
521
|
-
List of (start_line, end_line) tuples for interface blocks
|
|
522
|
-
"""
|
|
523
|
-
ranges: list[tuple[int, int]] = []
|
|
524
|
-
lines = content.split("\n")
|
|
525
|
-
state = {"in_interface": False, "start_line": 0, "brace_count": 0}
|
|
526
|
-
|
|
527
|
-
for i, line in enumerate(lines, start=1):
|
|
528
|
-
stripped = line.strip()
|
|
529
|
-
self._process_line_for_interface(stripped, i, state, ranges)
|
|
530
|
-
|
|
531
|
-
return ranges
|
|
532
|
-
|
|
533
|
-
def _process_line_for_interface(
|
|
534
|
-
self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
|
|
535
|
-
) -> None:
|
|
536
|
-
"""Process single line for interface detection.
|
|
537
|
-
|
|
538
|
-
Args:
|
|
539
|
-
stripped: Stripped line content
|
|
540
|
-
line_num: Line number
|
|
541
|
-
state: Tracking state (in_interface, start_line, brace_count)
|
|
542
|
-
ranges: Accumulated interface ranges
|
|
543
|
-
"""
|
|
544
|
-
if self._is_interface_start(stripped):
|
|
545
|
-
self._handle_interface_start(stripped, line_num, state, ranges)
|
|
546
|
-
return
|
|
547
|
-
|
|
548
|
-
if state["in_interface"]:
|
|
549
|
-
self._handle_interface_continuation(stripped, line_num, state, ranges)
|
|
550
|
-
|
|
551
|
-
def _is_interface_start(self, stripped: str) -> bool:
|
|
552
|
-
"""Check if line starts interface/type definition."""
|
|
553
|
-
return stripped.startswith(("interface ", "type ")) and "{" in stripped
|
|
554
|
-
|
|
555
|
-
def _handle_interface_start(
|
|
556
|
-
self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
|
|
557
|
-
) -> None:
|
|
558
|
-
"""Handle start of interface definition."""
|
|
559
|
-
state["in_interface"] = True
|
|
560
|
-
state["start_line"] = line_num
|
|
561
|
-
state["brace_count"] = stripped.count("{") - stripped.count("}")
|
|
562
|
-
|
|
563
|
-
if state["brace_count"] == 0: # Single-line interface
|
|
564
|
-
ranges.append((line_num, line_num))
|
|
565
|
-
state["in_interface"] = False
|
|
566
|
-
|
|
567
|
-
def _handle_interface_continuation(
|
|
568
|
-
self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
|
|
569
|
-
) -> None:
|
|
570
|
-
"""Handle continuation of interface definition."""
|
|
571
|
-
state["brace_count"] += stripped.count("{") - stripped.count("}")
|
|
572
|
-
if state["brace_count"] == 0:
|
|
573
|
-
ranges.append((state["start_line"], line_num))
|
|
574
|
-
state["in_interface"] = False
|
|
575
|
-
|
|
576
|
-
def _overlaps_interface(
|
|
577
|
-
self, start: int, end: int, interface_ranges: list[tuple[int, int]]
|
|
578
|
-
) -> bool:
|
|
579
|
-
"""Check if block overlaps with any interface range.
|
|
580
|
-
|
|
581
|
-
Args:
|
|
582
|
-
start: Block start line
|
|
583
|
-
end: Block end line
|
|
584
|
-
interface_ranges: List of interface definition ranges
|
|
585
|
-
|
|
586
|
-
Returns:
|
|
587
|
-
True if block overlaps with an interface
|
|
588
|
-
"""
|
|
589
|
-
for if_start, if_end in interface_ranges:
|
|
590
|
-
if start <= if_end and end >= if_start:
|
|
591
|
-
return True
|
|
592
|
-
return False
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Extract TypeScript module-level constants using tree-sitter parsing
|
|
3
|
+
|
|
4
|
+
Scope: TypeScript constant extraction for duplicate constants detection
|
|
5
|
+
|
|
6
|
+
Overview: Extracts module-level constant definitions from TypeScript source code using tree-sitter.
|
|
7
|
+
Identifies constants as top-level `const` declarations where the variable name matches the
|
|
8
|
+
UPPER_SNAKE_CASE naming convention (e.g., const API_TIMEOUT = 30). Excludes non-const
|
|
9
|
+
declarations (let, var), class-level constants, and function-level constants to focus on
|
|
10
|
+
public module constants that should be consolidated across files.
|
|
11
|
+
|
|
12
|
+
Dependencies: tree-sitter, tree-sitter-typescript, re for pattern matching, ConstantInfo,
|
|
13
|
+
TypeScriptValueExtractor
|
|
14
|
+
|
|
15
|
+
Exports: TypeScriptConstantExtractor class
|
|
16
|
+
|
|
17
|
+
Interfaces: TypeScriptConstantExtractor.extract(content: str) -> list[ConstantInfo]
|
|
18
|
+
|
|
19
|
+
Implementation: Tree-sitter-based parsing with const declaration filtering and ALL_CAPS regex matching
|
|
20
|
+
|
|
21
|
+
Suppressions:
|
|
22
|
+
- type:ignore[assignment,misc]: Tree-sitter Node type alias (optional dependency fallback)
|
|
23
|
+
- broad-exception-caught: Defensive parsing for malformed TypeScript code
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from src.analyzers.typescript_base import TREE_SITTER_AVAILABLE, TS_PARSER
|
|
29
|
+
|
|
30
|
+
from .constant import CONSTANT_NAME_PATTERN, ConstantInfo
|
|
31
|
+
from .typescript_value_extractor import TypeScriptValueExtractor
|
|
32
|
+
|
|
33
|
+
if TREE_SITTER_AVAILABLE:
|
|
34
|
+
from tree_sitter import Node
|
|
35
|
+
else:
|
|
36
|
+
Node = Any # type: ignore[assignment,misc]
|
|
37
|
+
|
|
38
|
+
# Node types that represent values
|
|
39
|
+
VALUE_TYPES = frozenset(
|
|
40
|
+
(
|
|
41
|
+
"number",
|
|
42
|
+
"string",
|
|
43
|
+
"true",
|
|
44
|
+
"false",
|
|
45
|
+
"null",
|
|
46
|
+
"identifier",
|
|
47
|
+
"array",
|
|
48
|
+
"object",
|
|
49
|
+
"call_expression",
|
|
50
|
+
)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TypeScriptConstantExtractor:
|
|
55
|
+
"""Extracts module-level constants from TypeScript source code."""
|
|
56
|
+
|
|
57
|
+
def __init__(self) -> None:
|
|
58
|
+
"""Initialize the TypeScript constant extractor."""
|
|
59
|
+
self.tree_sitter_available = TREE_SITTER_AVAILABLE
|
|
60
|
+
self._value_extractor = TypeScriptValueExtractor()
|
|
61
|
+
|
|
62
|
+
def extract(self, content: str) -> list[ConstantInfo]:
|
|
63
|
+
"""Extract constants from TypeScript source code."""
|
|
64
|
+
root = _parse_content(content)
|
|
65
|
+
if root is None:
|
|
66
|
+
return []
|
|
67
|
+
constants: list[ConstantInfo] = []
|
|
68
|
+
for child in root.children:
|
|
69
|
+
constants.extend(self._extract_from_node(child, content))
|
|
70
|
+
return constants
|
|
71
|
+
|
|
72
|
+
def _extract_from_node(self, node: Node, content: str) -> list[ConstantInfo]:
|
|
73
|
+
"""Extract constants from a single AST node."""
|
|
74
|
+
if node.type == "lexical_declaration":
|
|
75
|
+
return self._extract_from_lexical_declaration(node, content)
|
|
76
|
+
if node.type == "export_statement":
|
|
77
|
+
return self._extract_from_export(node, content)
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
def _extract_from_lexical_declaration(self, node: Node, content: str) -> list[ConstantInfo]:
|
|
81
|
+
"""Extract constants from a lexical declaration."""
|
|
82
|
+
if not _is_const_declaration(node):
|
|
83
|
+
return []
|
|
84
|
+
return [
|
|
85
|
+
info
|
|
86
|
+
for c in node.children
|
|
87
|
+
if c.type == "variable_declarator"
|
|
88
|
+
and (info := self._extract_from_declarator(c, content))
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
def _extract_from_export(self, node: Node, content: str) -> list[ConstantInfo]:
|
|
92
|
+
"""Extract constants from an export statement."""
|
|
93
|
+
for child in node.children:
|
|
94
|
+
if child.type == "lexical_declaration":
|
|
95
|
+
return self._extract_from_lexical_declaration(child, content)
|
|
96
|
+
return []
|
|
97
|
+
|
|
98
|
+
def _extract_from_declarator(self, node: Node, content: str) -> ConstantInfo | None:
|
|
99
|
+
"""Extract constant info from a variable declarator."""
|
|
100
|
+
name, value = _get_name_and_value(node, content, self._value_extractor)
|
|
101
|
+
if not name or not _is_constant_name(name):
|
|
102
|
+
return None
|
|
103
|
+
return ConstantInfo(name=name, line_number=node.start_point[0] + 1, value=value)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _parse_content(content: str) -> Node | None:
|
|
107
|
+
"""Parse content and return root node, or None on failure."""
|
|
108
|
+
if not TREE_SITTER_AVAILABLE or TS_PARSER is None:
|
|
109
|
+
return None
|
|
110
|
+
try:
|
|
111
|
+
return TS_PARSER.parse(bytes(content, "utf8")).root_node
|
|
112
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _is_const_declaration(node: Node) -> bool:
|
|
117
|
+
"""Check if lexical declaration is a const."""
|
|
118
|
+
return any(child.type == "const" for child in node.children)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _get_name_and_value(
|
|
122
|
+
node: Node, content: str, extractor: TypeScriptValueExtractor
|
|
123
|
+
) -> tuple[str | None, str | None]:
|
|
124
|
+
"""Extract name and value from declarator node."""
|
|
125
|
+
name = next(
|
|
126
|
+
(extractor.get_node_text(c, content) for c in node.children if c.type == "identifier"),
|
|
127
|
+
None,
|
|
128
|
+
)
|
|
129
|
+
value = next(
|
|
130
|
+
(extractor.get_value_string(c, content) for c in node.children if c.type in VALUE_TYPES),
|
|
131
|
+
None,
|
|
132
|
+
)
|
|
133
|
+
return name, value
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _is_constant_name(name: str) -> bool:
|
|
137
|
+
"""Check if name matches constant naming convention."""
|
|
138
|
+
return not name.startswith("_") and bool(CONSTANT_NAME_PATTERN.match(name))
|