thailint 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +7 -2
- src/analyzers/__init__.py +23 -0
- src/analyzers/typescript_base.py +148 -0
- src/api.py +1 -1
- src/cli.py +498 -141
- src/config.py +6 -31
- src/core/base.py +12 -0
- src/core/cli_utils.py +206 -0
- src/core/config_parser.py +99 -0
- src/core/linter_utils.py +168 -0
- src/core/registry.py +17 -92
- src/core/rule_discovery.py +132 -0
- src/core/violation_builder.py +122 -0
- src/linter_config/ignore.py +112 -40
- src/linter_config/loader.py +3 -13
- src/linters/dry/__init__.py +23 -0
- src/linters/dry/base_token_analyzer.py +76 -0
- src/linters/dry/block_filter.py +262 -0
- src/linters/dry/block_grouper.py +59 -0
- src/linters/dry/cache.py +218 -0
- src/linters/dry/cache_query.py +61 -0
- src/linters/dry/config.py +130 -0
- src/linters/dry/config_loader.py +44 -0
- src/linters/dry/deduplicator.py +120 -0
- src/linters/dry/duplicate_storage.py +126 -0
- src/linters/dry/file_analyzer.py +127 -0
- src/linters/dry/inline_ignore.py +140 -0
- src/linters/dry/linter.py +170 -0
- src/linters/dry/python_analyzer.py +517 -0
- src/linters/dry/storage_initializer.py +51 -0
- src/linters/dry/token_hasher.py +115 -0
- src/linters/dry/typescript_analyzer.py +590 -0
- src/linters/dry/violation_builder.py +74 -0
- src/linters/dry/violation_filter.py +91 -0
- src/linters/dry/violation_generator.py +174 -0
- src/linters/file_placement/config_loader.py +86 -0
- src/linters/file_placement/directory_matcher.py +80 -0
- src/linters/file_placement/linter.py +252 -472
- src/linters/file_placement/path_resolver.py +61 -0
- src/linters/file_placement/pattern_matcher.py +55 -0
- src/linters/file_placement/pattern_validator.py +106 -0
- src/linters/file_placement/rule_checker.py +229 -0
- src/linters/file_placement/violation_factory.py +177 -0
- src/linters/nesting/config.py +13 -3
- src/linters/nesting/linter.py +76 -152
- src/linters/nesting/typescript_analyzer.py +38 -102
- src/linters/nesting/typescript_function_extractor.py +130 -0
- src/linters/nesting/violation_builder.py +139 -0
- src/linters/srp/__init__.py +99 -0
- src/linters/srp/class_analyzer.py +113 -0
- src/linters/srp/config.py +76 -0
- src/linters/srp/heuristics.py +89 -0
- src/linters/srp/linter.py +225 -0
- src/linters/srp/metrics_evaluator.py +47 -0
- src/linters/srp/python_analyzer.py +72 -0
- src/linters/srp/typescript_analyzer.py +75 -0
- src/linters/srp/typescript_metrics_calculator.py +90 -0
- src/linters/srp/violation_builder.py +117 -0
- src/orchestrator/core.py +42 -7
- src/utils/__init__.py +4 -0
- src/utils/project_root.py +84 -0
- {thailint-0.1.6.dist-info → thailint-0.2.0.dist-info}/METADATA +414 -63
- thailint-0.2.0.dist-info/RECORD +75 -0
- src/.ai/layout.yaml +0 -48
- thailint-0.1.6.dist-info/RECORD +0 -28
- {thailint-0.1.6.dist-info → thailint-0.2.0.dist-info}/LICENSE +0 -0
- {thailint-0.1.6.dist-info → thailint-0.2.0.dist-info}/WHEEL +0 -0
- {thailint-0.1.6.dist-info → thailint-0.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,590 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: TypeScript/JavaScript source code tokenization and duplicate block analysis
|
|
3
|
+
|
|
4
|
+
Scope: TypeScript and JavaScript file analysis for duplicate detection
|
|
5
|
+
|
|
6
|
+
Overview: Analyzes TypeScript and JavaScript source files to extract code blocks for duplicate
|
|
7
|
+
detection. Inherits from BaseTokenAnalyzer for common token-based hashing and rolling hash
|
|
8
|
+
window logic. Adds TypeScript-specific filtering to exclude JSDoc comments, single statements
|
|
9
|
+
(decorators, function calls, object literals, class fields), and interface/type definitions.
|
|
10
|
+
Uses tree-sitter for AST-based filtering to achieve same sophistication as Python analyzer.
|
|
11
|
+
|
|
12
|
+
Dependencies: BaseTokenAnalyzer, CodeBlock, DRYConfig, pathlib.Path, tree-sitter
|
|
13
|
+
|
|
14
|
+
Exports: TypeScriptDuplicateAnalyzer class
|
|
15
|
+
|
|
16
|
+
Interfaces: TypeScriptDuplicateAnalyzer.analyze(file_path: Path, content: str, config: DRYConfig)
|
|
17
|
+
-> list[CodeBlock]
|
|
18
|
+
|
|
19
|
+
Implementation: Inherits analyze() workflow from BaseTokenAnalyzer, adds JSDoc comment extraction,
|
|
20
|
+
single statement detection using tree-sitter AST patterns, and interface filtering logic
|
|
21
|
+
|
|
22
|
+
SRP Exception: TypeScriptDuplicateAnalyzer has 20 methods and 324 lines (exceeds max 8 methods/200 lines)
|
|
23
|
+
Justification: Complex tree-sitter AST analysis algorithm for duplicate code detection with sophisticated
|
|
24
|
+
false positive filtering. Mirrors Python analyzer structure. Methods form tightly coupled algorithm
|
|
25
|
+
pipeline: JSDoc extraction, tokenization with line tracking, single-statement pattern detection across
|
|
26
|
+
10+ AST node types (decorators, call_expression, object, class_body, member_expression, as_expression,
|
|
27
|
+
jsx elements, array_pattern), and context-aware filtering. Similar to parser or compiler pass architecture
|
|
28
|
+
where algorithmic cohesion is critical. Splitting would fragment the algorithm logic and make maintenance
|
|
29
|
+
harder by separating interdependent tree-sitter AST analysis steps. All methods contribute to single
|
|
30
|
+
responsibility: accurately detecting duplicate TypeScript/JavaScript code while minimizing false positives.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from collections.abc import Generator
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
|
|
36
|
+
from src.analyzers.typescript_base import TREE_SITTER_AVAILABLE
|
|
37
|
+
|
|
38
|
+
from .base_token_analyzer import BaseTokenAnalyzer
|
|
39
|
+
from .block_filter import BlockFilterRegistry, create_default_registry
|
|
40
|
+
from .cache import CodeBlock
|
|
41
|
+
from .config import DRYConfig
|
|
42
|
+
|
|
43
|
+
if TREE_SITTER_AVAILABLE:
|
|
44
|
+
from tree_sitter import Node
|
|
45
|
+
else:
|
|
46
|
+
Node = None # type: ignore[assignment,misc] # pylint: disable=invalid-name
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violation]
|
|
50
|
+
"""Analyzes TypeScript/JavaScript code for duplicate blocks.
|
|
51
|
+
|
|
52
|
+
SRP suppression: Complex tree-sitter AST analysis algorithm requires 20 methods to implement
|
|
53
|
+
sophisticated duplicate detection with false positive filtering. See file header for justification.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, filter_registry: BlockFilterRegistry | None = None):
|
|
57
|
+
"""Initialize analyzer with optional custom filter registry.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
filter_registry: Custom filter registry (uses defaults if None)
|
|
61
|
+
"""
|
|
62
|
+
super().__init__()
|
|
63
|
+
self._filter_registry = filter_registry or create_default_registry()
|
|
64
|
+
|
|
65
|
+
def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
|
|
66
|
+
"""Analyze TypeScript/JavaScript file for duplicate code blocks.
|
|
67
|
+
|
|
68
|
+
Filters out JSDoc comments, single statements, and interface definitions.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
file_path: Path to source file
|
|
72
|
+
content: File content
|
|
73
|
+
config: DRY configuration
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
List of CodeBlock instances with hash values
|
|
77
|
+
"""
|
|
78
|
+
# Get JSDoc comment line ranges
|
|
79
|
+
jsdoc_ranges = self._get_jsdoc_ranges_from_content(content)
|
|
80
|
+
|
|
81
|
+
# Tokenize with line number tracking, skipping JSDoc lines
|
|
82
|
+
lines_with_numbers = self._tokenize_with_line_numbers(content, jsdoc_ranges)
|
|
83
|
+
|
|
84
|
+
# Generate rolling hash windows
|
|
85
|
+
windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
|
|
86
|
+
|
|
87
|
+
blocks = []
|
|
88
|
+
for hash_val, start_line, end_line, snippet in windows:
|
|
89
|
+
# Filter interface/type definitions
|
|
90
|
+
if not self._should_include_block(content, start_line, end_line):
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
# Filter single statement patterns
|
|
94
|
+
if self._is_single_statement_in_source(content, start_line, end_line):
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
block = CodeBlock(
|
|
98
|
+
file_path=file_path,
|
|
99
|
+
start_line=start_line,
|
|
100
|
+
end_line=end_line,
|
|
101
|
+
snippet=snippet,
|
|
102
|
+
hash_value=hash_val,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Apply extensible filters (keyword arguments, imports, etc.)
|
|
106
|
+
if self._filter_registry.should_filter_block(block, content):
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
blocks.append(block)
|
|
110
|
+
|
|
111
|
+
return blocks
|
|
112
|
+
|
|
113
|
+
def _get_jsdoc_ranges_from_content(self, content: str) -> set[int]:
|
|
114
|
+
"""Extract line numbers that are part of JSDoc comments.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
content: TypeScript/JavaScript source code
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Set of line numbers (1-indexed) that are part of JSDoc comments
|
|
121
|
+
"""
|
|
122
|
+
if not TREE_SITTER_AVAILABLE:
|
|
123
|
+
return set()
|
|
124
|
+
|
|
125
|
+
from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
|
|
126
|
+
|
|
127
|
+
analyzer = TypeScriptBaseAnalyzer()
|
|
128
|
+
root = analyzer.parse_typescript(content)
|
|
129
|
+
if not root:
|
|
130
|
+
return set()
|
|
131
|
+
|
|
132
|
+
jsdoc_lines: set[int] = set()
|
|
133
|
+
self._collect_jsdoc_lines_recursive(root, jsdoc_lines)
|
|
134
|
+
return jsdoc_lines
|
|
135
|
+
|
|
136
|
+
def _is_jsdoc_comment(self, node: Node) -> bool:
|
|
137
|
+
"""Check if node is a JSDoc comment.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
node: Tree-sitter node to check
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
True if node is JSDoc comment (/** ... */)
|
|
144
|
+
"""
|
|
145
|
+
if node.type != "comment":
|
|
146
|
+
return False
|
|
147
|
+
|
|
148
|
+
text = node.text.decode() if node.text else ""
|
|
149
|
+
return text.startswith("/**")
|
|
150
|
+
|
|
151
|
+
def _add_comment_lines_to_set(self, node: Node, jsdoc_lines: set[int]) -> None:
|
|
152
|
+
"""Add comment node's line range to set.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
node: Comment node
|
|
156
|
+
jsdoc_lines: Set to add line numbers to
|
|
157
|
+
"""
|
|
158
|
+
start_line = node.start_point[0] + 1
|
|
159
|
+
end_line = node.end_point[0] + 1
|
|
160
|
+
for line_num in range(start_line, end_line + 1):
|
|
161
|
+
jsdoc_lines.add(line_num)
|
|
162
|
+
|
|
163
|
+
def _collect_jsdoc_lines_recursive(self, node: Node, jsdoc_lines: set[int]) -> None:
|
|
164
|
+
"""Recursively collect JSDoc comment line ranges.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
node: Tree-sitter node to examine
|
|
168
|
+
jsdoc_lines: Set to accumulate line numbers
|
|
169
|
+
"""
|
|
170
|
+
if self._is_jsdoc_comment(node):
|
|
171
|
+
self._add_comment_lines_to_set(node, jsdoc_lines)
|
|
172
|
+
|
|
173
|
+
for child in node.children:
|
|
174
|
+
self._collect_jsdoc_lines_recursive(child, jsdoc_lines)
|
|
175
|
+
|
|
176
|
+
def _tokenize_with_line_numbers(
|
|
177
|
+
self, content: str, jsdoc_lines: set[int]
|
|
178
|
+
) -> list[tuple[int, str]]:
|
|
179
|
+
"""Tokenize code while tracking original line numbers and skipping JSDoc.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
content: Source code
|
|
183
|
+
jsdoc_lines: Set of line numbers that are JSDoc comments
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
List of (original_line_number, normalized_code) tuples
|
|
187
|
+
"""
|
|
188
|
+
lines_with_numbers = []
|
|
189
|
+
|
|
190
|
+
for line_num, line in enumerate(content.split("\n"), start=1):
|
|
191
|
+
# Skip JSDoc comment lines
|
|
192
|
+
if line_num in jsdoc_lines:
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
# Use hasher's existing tokenization logic
|
|
196
|
+
line = self._hasher._strip_comments(line) # pylint: disable=protected-access
|
|
197
|
+
line = " ".join(line.split())
|
|
198
|
+
|
|
199
|
+
if not line:
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
if self._hasher._is_import_statement(line): # pylint: disable=protected-access
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
lines_with_numbers.append((line_num, line))
|
|
206
|
+
|
|
207
|
+
return lines_with_numbers
|
|
208
|
+
|
|
209
|
+
def _rolling_hash_with_tracking(
|
|
210
|
+
self, lines_with_numbers: list[tuple[int, str]], window_size: int
|
|
211
|
+
) -> list[tuple[int, int, int, str]]:
|
|
212
|
+
"""Create rolling hash windows while preserving original line numbers.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
lines_with_numbers: List of (line_number, code) tuples
|
|
216
|
+
window_size: Number of lines per window
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
List of (hash_value, start_line, end_line, snippet) tuples
|
|
220
|
+
"""
|
|
221
|
+
if len(lines_with_numbers) < window_size:
|
|
222
|
+
return []
|
|
223
|
+
|
|
224
|
+
hashes = []
|
|
225
|
+
for i in range(len(lines_with_numbers) - window_size + 1):
|
|
226
|
+
window = lines_with_numbers[i : i + window_size]
|
|
227
|
+
|
|
228
|
+
# Extract just the code for hashing
|
|
229
|
+
code_lines = [code for _, code in window]
|
|
230
|
+
snippet = "\n".join(code_lines)
|
|
231
|
+
hash_val = hash(snippet)
|
|
232
|
+
|
|
233
|
+
# Get original line numbers
|
|
234
|
+
start_line = window[0][0]
|
|
235
|
+
end_line = window[-1][0]
|
|
236
|
+
|
|
237
|
+
hashes.append((hash_val, start_line, end_line, snippet))
|
|
238
|
+
|
|
239
|
+
return hashes
|
|
240
|
+
|
|
241
|
+
def _should_include_block(self, content: str, start_line: int, end_line: int) -> bool:
|
|
242
|
+
"""Filter out blocks that overlap with interface/type definitions.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
content: File content
|
|
246
|
+
start_line: Block start line
|
|
247
|
+
end_line: Block end line
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
False if block overlaps interface definition, True otherwise
|
|
251
|
+
"""
|
|
252
|
+
interface_ranges = self._find_interface_ranges(content)
|
|
253
|
+
return not self._overlaps_interface(start_line, end_line, interface_ranges)
|
|
254
|
+
|
|
255
|
+
def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
|
|
256
|
+
"""Check if a line range in the original source is a single logical statement.
|
|
257
|
+
|
|
258
|
+
Uses tree-sitter AST analysis to detect patterns like:
|
|
259
|
+
- Decorators (@Component(...))
|
|
260
|
+
- Function call arguments
|
|
261
|
+
- Object literal properties
|
|
262
|
+
- Class field definitions
|
|
263
|
+
- Type assertions
|
|
264
|
+
- Chained method calls (single expression)
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
content: TypeScript source code
|
|
268
|
+
start_line: Starting line number (1-indexed)
|
|
269
|
+
end_line: Ending line number (1-indexed)
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
True if this range represents a single logical statement/expression
|
|
273
|
+
"""
|
|
274
|
+
if not TREE_SITTER_AVAILABLE:
|
|
275
|
+
return False
|
|
276
|
+
|
|
277
|
+
from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
|
|
278
|
+
|
|
279
|
+
analyzer = TypeScriptBaseAnalyzer()
|
|
280
|
+
root = analyzer.parse_typescript(content)
|
|
281
|
+
if not root:
|
|
282
|
+
return False
|
|
283
|
+
|
|
284
|
+
return self._check_overlapping_nodes(root, start_line, end_line)
|
|
285
|
+
|
|
286
|
+
def _check_overlapping_nodes(self, root: Node, start_line: int, end_line: int) -> bool:
|
|
287
|
+
"""Check if any AST node overlaps and matches single-statement pattern.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
root: Root tree-sitter node
|
|
291
|
+
start_line: Starting line number (1-indexed)
|
|
292
|
+
end_line: Ending line number (1-indexed)
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
True if any node matches single-statement pattern
|
|
296
|
+
"""
|
|
297
|
+
# Convert to 0-indexed for tree-sitter
|
|
298
|
+
ts_start = start_line - 1
|
|
299
|
+
ts_end = end_line - 1
|
|
300
|
+
|
|
301
|
+
for node in self._walk_nodes(root):
|
|
302
|
+
if self._node_overlaps_and_matches(node, ts_start, ts_end):
|
|
303
|
+
return True
|
|
304
|
+
return False
|
|
305
|
+
|
|
306
|
+
def _walk_nodes(self, node: Node) -> Generator[Node, None, None]:
|
|
307
|
+
"""Generator to walk all nodes in tree.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
node: Starting node
|
|
311
|
+
|
|
312
|
+
Yields:
|
|
313
|
+
All nodes in tree
|
|
314
|
+
"""
|
|
315
|
+
yield node
|
|
316
|
+
for child in node.children:
|
|
317
|
+
yield from self._walk_nodes(child)
|
|
318
|
+
|
|
319
|
+
def _node_overlaps_and_matches(self, node: Node, ts_start: int, ts_end: int) -> bool:
|
|
320
|
+
"""Check if node overlaps with range and matches single-statement pattern.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
node: Tree-sitter node
|
|
324
|
+
ts_start: Starting line (0-indexed)
|
|
325
|
+
ts_end: Ending line (0-indexed)
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
True if node overlaps and matches pattern
|
|
329
|
+
"""
|
|
330
|
+
node_start = node.start_point[0]
|
|
331
|
+
node_end = node.end_point[0]
|
|
332
|
+
|
|
333
|
+
# Check if ranges overlap
|
|
334
|
+
overlaps = not (node_end < ts_start or node_start > ts_end)
|
|
335
|
+
if not overlaps:
|
|
336
|
+
return False
|
|
337
|
+
|
|
338
|
+
return self._is_single_statement_pattern(node, ts_start, ts_end)
|
|
339
|
+
|
|
340
|
+
def _matches_simple_container_pattern(self, node: Node, contains: bool) -> bool:
|
|
341
|
+
"""Check if node is a simple container pattern (decorator, object, etc.).
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
node: AST node to check
|
|
345
|
+
contains: Whether node contains the range
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
True if node matches simple container pattern
|
|
349
|
+
"""
|
|
350
|
+
simple_types = (
|
|
351
|
+
"decorator",
|
|
352
|
+
"object",
|
|
353
|
+
"member_expression",
|
|
354
|
+
"as_expression",
|
|
355
|
+
"array_pattern",
|
|
356
|
+
)
|
|
357
|
+
return node.type in simple_types and contains
|
|
358
|
+
|
|
359
|
+
def _matches_call_expression_pattern(
|
|
360
|
+
self, node: Node, ts_start: int, ts_end: int, contains: bool
|
|
361
|
+
) -> bool:
|
|
362
|
+
"""Check if node is a call expression pattern.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
node: AST node to check
|
|
366
|
+
ts_start: Starting line (0-indexed)
|
|
367
|
+
ts_end: Ending line (0-indexed)
|
|
368
|
+
contains: Whether node contains the range
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
True if node matches call expression pattern
|
|
372
|
+
"""
|
|
373
|
+
if node.type != "call_expression":
|
|
374
|
+
return False
|
|
375
|
+
|
|
376
|
+
# Check if this is a multi-line call containing the range
|
|
377
|
+
node_start = node.start_point[0]
|
|
378
|
+
node_end = node.end_point[0]
|
|
379
|
+
is_multiline = node_start < node_end
|
|
380
|
+
if is_multiline and node_start <= ts_start <= node_end:
|
|
381
|
+
return True
|
|
382
|
+
|
|
383
|
+
return contains
|
|
384
|
+
|
|
385
|
+
def _matches_declaration_pattern(self, node: Node, contains: bool) -> bool:
|
|
386
|
+
"""Check if node is a lexical declaration pattern.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
node: AST node to check
|
|
390
|
+
contains: Whether node contains the range
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
True if node matches declaration pattern (excluding function bodies)
|
|
394
|
+
"""
|
|
395
|
+
if node.type != "lexical_declaration" or not contains:
|
|
396
|
+
return False
|
|
397
|
+
|
|
398
|
+
# Only filter if simple value assignment, NOT a function body
|
|
399
|
+
if self._contains_function_body(node):
|
|
400
|
+
return False
|
|
401
|
+
|
|
402
|
+
return True
|
|
403
|
+
|
|
404
|
+
def _matches_jsx_pattern(self, node: Node, contains: bool) -> bool:
|
|
405
|
+
"""Check if node is a JSX element pattern.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
node: AST node to check
|
|
409
|
+
contains: Whether node contains the range
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
True if node matches JSX pattern
|
|
413
|
+
"""
|
|
414
|
+
jsx_types = ("jsx_opening_element", "jsx_self_closing_element")
|
|
415
|
+
return node.type in jsx_types and contains
|
|
416
|
+
|
|
417
|
+
def _matches_class_body_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
|
|
418
|
+
"""Check if node is a class body field definition pattern.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
node: AST node to check
|
|
422
|
+
ts_start: Starting line (0-indexed)
|
|
423
|
+
ts_end: Ending line (0-indexed)
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
True if node is class body with field definitions
|
|
427
|
+
"""
|
|
428
|
+
if node.type != "class_body":
|
|
429
|
+
return False
|
|
430
|
+
|
|
431
|
+
return self._is_in_class_field_area(node, ts_start, ts_end)
|
|
432
|
+
|
|
433
|
+
def _is_single_statement_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
|
|
434
|
+
"""Check if an AST node represents a single-statement pattern to filter.
|
|
435
|
+
|
|
436
|
+
Delegates to specialized pattern matchers for different AST node categories.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
node: AST node that overlaps with the line range
|
|
440
|
+
ts_start: Starting line number (0-indexed)
|
|
441
|
+
ts_end: Ending line number (0-indexed)
|
|
442
|
+
|
|
443
|
+
Returns:
|
|
444
|
+
True if this node represents a single logical statement pattern
|
|
445
|
+
"""
|
|
446
|
+
node_start = node.start_point[0]
|
|
447
|
+
node_end = node.end_point[0]
|
|
448
|
+
contains = (node_start <= ts_start) and (node_end >= ts_end)
|
|
449
|
+
|
|
450
|
+
# Check pattern categories using specialized helpers - use list for any()
|
|
451
|
+
matchers = [
|
|
452
|
+
self._matches_simple_container_pattern(node, contains),
|
|
453
|
+
self._matches_call_expression_pattern(node, ts_start, ts_end, contains),
|
|
454
|
+
self._matches_declaration_pattern(node, contains),
|
|
455
|
+
self._matches_jsx_pattern(node, contains),
|
|
456
|
+
self._matches_class_body_pattern(node, ts_start, ts_end),
|
|
457
|
+
]
|
|
458
|
+
return any(matchers)
|
|
459
|
+
|
|
460
|
+
def _contains_function_body(self, node: Node) -> bool:
|
|
461
|
+
"""Check if node contains an arrow function or function expression.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
node: Node to check
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
True if node contains a function with a body
|
|
468
|
+
"""
|
|
469
|
+
for child in node.children:
|
|
470
|
+
if child.type in ("arrow_function", "function", "function_expression"):
|
|
471
|
+
return True
|
|
472
|
+
if self._contains_function_body(child):
|
|
473
|
+
return True
|
|
474
|
+
return False
|
|
475
|
+
|
|
476
|
+
def _find_first_method_line(self, class_body: Node) -> int | None:
|
|
477
|
+
"""Find line number of first method in class body.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
class_body: Class body node
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
Line number of first method or None if no methods
|
|
484
|
+
"""
|
|
485
|
+
for child in class_body.children:
|
|
486
|
+
if child.type in ("method_definition", "function_declaration"):
|
|
487
|
+
return child.start_point[0]
|
|
488
|
+
return None
|
|
489
|
+
|
|
490
|
+
def _is_in_class_field_area(self, class_body: Node, ts_start: int, ts_end: int) -> bool:
|
|
491
|
+
"""Check if range is in class field definition area (before methods).
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
class_body: Class body node
|
|
495
|
+
ts_start: Starting line (0-indexed)
|
|
496
|
+
ts_end: Ending line (0-indexed)
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
True if range is in field area
|
|
500
|
+
"""
|
|
501
|
+
first_method_line = self._find_first_method_line(class_body)
|
|
502
|
+
class_start = class_body.start_point[0]
|
|
503
|
+
class_end = class_body.end_point[0]
|
|
504
|
+
|
|
505
|
+
# No methods: check if range is in class body
|
|
506
|
+
if first_method_line is None:
|
|
507
|
+
return class_start <= ts_start and class_end >= ts_end
|
|
508
|
+
|
|
509
|
+
# Has methods: check if range is before first method
|
|
510
|
+
return class_start <= ts_start and ts_end < first_method_line
|
|
511
|
+
|
|
512
|
+
def _find_interface_ranges(self, content: str) -> list[tuple[int, int]]:
|
|
513
|
+
"""Find line ranges of interface/type definitions.
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
content: File content
|
|
517
|
+
|
|
518
|
+
Returns:
|
|
519
|
+
List of (start_line, end_line) tuples for interface blocks
|
|
520
|
+
"""
|
|
521
|
+
ranges: list[tuple[int, int]] = []
|
|
522
|
+
lines = content.split("\n")
|
|
523
|
+
state = {"in_interface": False, "start_line": 0, "brace_count": 0}
|
|
524
|
+
|
|
525
|
+
for i, line in enumerate(lines, start=1):
|
|
526
|
+
stripped = line.strip()
|
|
527
|
+
self._process_line_for_interface(stripped, i, state, ranges)
|
|
528
|
+
|
|
529
|
+
return ranges
|
|
530
|
+
|
|
531
|
+
def _process_line_for_interface(
|
|
532
|
+
self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
|
|
533
|
+
) -> None:
|
|
534
|
+
"""Process single line for interface detection.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
stripped: Stripped line content
|
|
538
|
+
line_num: Line number
|
|
539
|
+
state: Tracking state (in_interface, start_line, brace_count)
|
|
540
|
+
ranges: Accumulated interface ranges
|
|
541
|
+
"""
|
|
542
|
+
if self._is_interface_start(stripped):
|
|
543
|
+
self._handle_interface_start(stripped, line_num, state, ranges)
|
|
544
|
+
return
|
|
545
|
+
|
|
546
|
+
if state["in_interface"]:
|
|
547
|
+
self._handle_interface_continuation(stripped, line_num, state, ranges)
|
|
548
|
+
|
|
549
|
+
def _is_interface_start(self, stripped: str) -> bool:
|
|
550
|
+
"""Check if line starts interface/type definition."""
|
|
551
|
+
return stripped.startswith(("interface ", "type ")) and "{" in stripped
|
|
552
|
+
|
|
553
|
+
def _handle_interface_start(
|
|
554
|
+
self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
|
|
555
|
+
) -> None:
|
|
556
|
+
"""Handle start of interface definition."""
|
|
557
|
+
state["in_interface"] = True
|
|
558
|
+
state["start_line"] = line_num
|
|
559
|
+
state["brace_count"] = stripped.count("{") - stripped.count("}")
|
|
560
|
+
|
|
561
|
+
if state["brace_count"] == 0: # Single-line interface
|
|
562
|
+
ranges.append((line_num, line_num))
|
|
563
|
+
state["in_interface"] = False
|
|
564
|
+
|
|
565
|
+
def _handle_interface_continuation(
|
|
566
|
+
self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
|
|
567
|
+
) -> None:
|
|
568
|
+
"""Handle continuation of interface definition."""
|
|
569
|
+
state["brace_count"] += stripped.count("{") - stripped.count("}")
|
|
570
|
+
if state["brace_count"] == 0:
|
|
571
|
+
ranges.append((state["start_line"], line_num))
|
|
572
|
+
state["in_interface"] = False
|
|
573
|
+
|
|
574
|
+
def _overlaps_interface(
|
|
575
|
+
self, start: int, end: int, interface_ranges: list[tuple[int, int]]
|
|
576
|
+
) -> bool:
|
|
577
|
+
"""Check if block overlaps with any interface range.
|
|
578
|
+
|
|
579
|
+
Args:
|
|
580
|
+
start: Block start line
|
|
581
|
+
end: Block end line
|
|
582
|
+
interface_ranges: List of interface definition ranges
|
|
583
|
+
|
|
584
|
+
Returns:
|
|
585
|
+
True if block overlaps with an interface
|
|
586
|
+
"""
|
|
587
|
+
for if_start, if_end in interface_ranges:
|
|
588
|
+
if start <= if_end and end >= if_start:
|
|
589
|
+
return True
|
|
590
|
+
return False
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Violation message formatting for duplicate code detection
|
|
3
|
+
|
|
4
|
+
Scope: Construction of detailed violation messages with cross-file references
|
|
5
|
+
|
|
6
|
+
Overview: Builds comprehensive violation messages for duplicate code blocks. Formats messages to
|
|
7
|
+
include duplicate line count, occurrence count, and references to all other locations where
|
|
8
|
+
the duplicate appears. Provides helpful context for developers to identify and refactor
|
|
9
|
+
duplicated code. Follows established violation building pattern from nesting and SRP linters.
|
|
10
|
+
|
|
11
|
+
Dependencies: CodeBlock, Violation from core.types
|
|
12
|
+
|
|
13
|
+
Exports: DRYViolationBuilder class
|
|
14
|
+
|
|
15
|
+
Interfaces: DRYViolationBuilder.build_violation(block: CodeBlock, all_duplicates: list[CodeBlock],
|
|
16
|
+
rule_id: str) -> Violation
|
|
17
|
+
|
|
18
|
+
Implementation: Message formatting with cross-references, location tracking, multi-file duplicate
|
|
19
|
+
reporting
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from src.core.types import Severity, Violation
|
|
23
|
+
|
|
24
|
+
from .cache import CodeBlock
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DRYViolationBuilder:
|
|
28
|
+
"""Builds violation messages for duplicate code."""
|
|
29
|
+
|
|
30
|
+
def build_violation(
|
|
31
|
+
self, block: CodeBlock, all_duplicates: list[CodeBlock], rule_id: str
|
|
32
|
+
) -> Violation:
|
|
33
|
+
"""Build violation for duplicate code block.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
block: The code block in current file
|
|
37
|
+
all_duplicates: All blocks with same hash (including current)
|
|
38
|
+
rule_id: Rule identifier
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Violation instance with formatted message
|
|
42
|
+
"""
|
|
43
|
+
line_count = block.end_line - block.start_line + 1
|
|
44
|
+
occurrence_count = len(all_duplicates)
|
|
45
|
+
|
|
46
|
+
# Get other locations and format message
|
|
47
|
+
location_refs = self._get_location_refs(block, all_duplicates)
|
|
48
|
+
message = self._build_message(line_count, occurrence_count, location_refs)
|
|
49
|
+
|
|
50
|
+
return Violation(
|
|
51
|
+
rule_id=rule_id,
|
|
52
|
+
message=message,
|
|
53
|
+
file_path=str(block.file_path),
|
|
54
|
+
line=block.start_line,
|
|
55
|
+
column=1,
|
|
56
|
+
severity=Severity.ERROR,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def _get_location_refs(self, block: CodeBlock, all_duplicates: list[CodeBlock]) -> list[str]:
|
|
60
|
+
"""Get formatted location strings for other duplicates."""
|
|
61
|
+
other_blocks = [
|
|
62
|
+
d
|
|
63
|
+
for d in all_duplicates
|
|
64
|
+
if d.file_path != block.file_path or d.start_line != block.start_line
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
return [f"{loc.file_path}:{loc.start_line}-{loc.end_line}" for loc in other_blocks]
|
|
68
|
+
|
|
69
|
+
def _build_message(self, line_count: int, occurrence_count: int, locations: list[str]) -> str:
|
|
70
|
+
"""Build violation message with location references."""
|
|
71
|
+
message = f"Duplicate code ({line_count} lines, {occurrence_count} occurrences)"
|
|
72
|
+
if locations:
|
|
73
|
+
message += f". Also found in: {', '.join(locations)}"
|
|
74
|
+
return message
|