thailint 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. src/__init__.py +7 -2
  2. src/analyzers/__init__.py +23 -0
  3. src/analyzers/typescript_base.py +148 -0
  4. src/api.py +1 -1
  5. src/cli.py +498 -141
  6. src/config.py +6 -31
  7. src/core/base.py +12 -0
  8. src/core/cli_utils.py +206 -0
  9. src/core/config_parser.py +99 -0
  10. src/core/linter_utils.py +168 -0
  11. src/core/registry.py +17 -92
  12. src/core/rule_discovery.py +132 -0
  13. src/core/violation_builder.py +122 -0
  14. src/linter_config/ignore.py +112 -40
  15. src/linter_config/loader.py +3 -13
  16. src/linters/dry/__init__.py +23 -0
  17. src/linters/dry/base_token_analyzer.py +76 -0
  18. src/linters/dry/block_filter.py +262 -0
  19. src/linters/dry/block_grouper.py +59 -0
  20. src/linters/dry/cache.py +218 -0
  21. src/linters/dry/cache_query.py +61 -0
  22. src/linters/dry/config.py +130 -0
  23. src/linters/dry/config_loader.py +44 -0
  24. src/linters/dry/deduplicator.py +120 -0
  25. src/linters/dry/duplicate_storage.py +126 -0
  26. src/linters/dry/file_analyzer.py +127 -0
  27. src/linters/dry/inline_ignore.py +140 -0
  28. src/linters/dry/linter.py +170 -0
  29. src/linters/dry/python_analyzer.py +517 -0
  30. src/linters/dry/storage_initializer.py +51 -0
  31. src/linters/dry/token_hasher.py +115 -0
  32. src/linters/dry/typescript_analyzer.py +590 -0
  33. src/linters/dry/violation_builder.py +74 -0
  34. src/linters/dry/violation_filter.py +91 -0
  35. src/linters/dry/violation_generator.py +174 -0
  36. src/linters/file_placement/config_loader.py +86 -0
  37. src/linters/file_placement/directory_matcher.py +80 -0
  38. src/linters/file_placement/linter.py +252 -472
  39. src/linters/file_placement/path_resolver.py +61 -0
  40. src/linters/file_placement/pattern_matcher.py +55 -0
  41. src/linters/file_placement/pattern_validator.py +106 -0
  42. src/linters/file_placement/rule_checker.py +229 -0
  43. src/linters/file_placement/violation_factory.py +177 -0
  44. src/linters/nesting/config.py +13 -3
  45. src/linters/nesting/linter.py +76 -152
  46. src/linters/nesting/typescript_analyzer.py +38 -102
  47. src/linters/nesting/typescript_function_extractor.py +130 -0
  48. src/linters/nesting/violation_builder.py +139 -0
  49. src/linters/srp/__init__.py +99 -0
  50. src/linters/srp/class_analyzer.py +113 -0
  51. src/linters/srp/config.py +76 -0
  52. src/linters/srp/heuristics.py +89 -0
  53. src/linters/srp/linter.py +225 -0
  54. src/linters/srp/metrics_evaluator.py +47 -0
  55. src/linters/srp/python_analyzer.py +72 -0
  56. src/linters/srp/typescript_analyzer.py +75 -0
  57. src/linters/srp/typescript_metrics_calculator.py +90 -0
  58. src/linters/srp/violation_builder.py +117 -0
  59. src/orchestrator/core.py +42 -7
  60. src/utils/__init__.py +4 -0
  61. src/utils/project_root.py +84 -0
  62. {thailint-0.1.5.dist-info → thailint-0.2.0.dist-info}/METADATA +414 -63
  63. thailint-0.2.0.dist-info/RECORD +75 -0
  64. src/.ai/layout.yaml +0 -48
  65. thailint-0.1.5.dist-info/RECORD +0 -28
  66. {thailint-0.1.5.dist-info → thailint-0.2.0.dist-info}/LICENSE +0 -0
  67. {thailint-0.1.5.dist-info → thailint-0.2.0.dist-info}/WHEEL +0 -0
  68. {thailint-0.1.5.dist-info → thailint-0.2.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,590 @@
1
+ """
2
+ Purpose: TypeScript/JavaScript source code tokenization and duplicate block analysis
3
+
4
+ Scope: TypeScript and JavaScript file analysis for duplicate detection
5
+
6
+ Overview: Analyzes TypeScript and JavaScript source files to extract code blocks for duplicate
7
+ detection. Inherits from BaseTokenAnalyzer for common token-based hashing and rolling hash
8
+ window logic. Adds TypeScript-specific filtering to exclude JSDoc comments, single statements
9
+ (decorators, function calls, object literals, class fields), and interface/type definitions.
10
+ Uses tree-sitter for AST-based filtering to achieve same sophistication as Python analyzer.
11
+
12
+ Dependencies: BaseTokenAnalyzer, CodeBlock, DRYConfig, pathlib.Path, tree-sitter
13
+
14
+ Exports: TypeScriptDuplicateAnalyzer class
15
+
16
+ Interfaces: TypeScriptDuplicateAnalyzer.analyze(file_path: Path, content: str, config: DRYConfig)
17
+ -> list[CodeBlock]
18
+
19
+ Implementation: Inherits analyze() workflow from BaseTokenAnalyzer, adds JSDoc comment extraction,
20
+ single statement detection using tree-sitter AST patterns, and interface filtering logic
21
+
22
+ SRP Exception: TypeScriptDuplicateAnalyzer has 20 methods and 324 lines (exceeds max 8 methods/200 lines)
23
+ Justification: Complex tree-sitter AST analysis algorithm for duplicate code detection with sophisticated
24
+ false positive filtering. Mirrors Python analyzer structure. Methods form tightly coupled algorithm
25
+ pipeline: JSDoc extraction, tokenization with line tracking, single-statement pattern detection across
26
+ 10+ AST node types (decorators, call_expression, object, class_body, member_expression, as_expression,
27
+ jsx elements, array_pattern), and context-aware filtering. Similar to parser or compiler pass architecture
28
+ where algorithmic cohesion is critical. Splitting would fragment the algorithm logic and make maintenance
29
+ harder by separating interdependent tree-sitter AST analysis steps. All methods contribute to single
30
+ responsibility: accurately detecting duplicate TypeScript/JavaScript code while minimizing false positives.
31
+ """
32
+
33
+ from collections.abc import Generator
34
+ from pathlib import Path
35
+
36
+ from src.analyzers.typescript_base import TREE_SITTER_AVAILABLE
37
+
38
+ from .base_token_analyzer import BaseTokenAnalyzer
39
+ from .block_filter import BlockFilterRegistry, create_default_registry
40
+ from .cache import CodeBlock
41
+ from .config import DRYConfig
42
+
43
+ if TREE_SITTER_AVAILABLE:
44
+ from tree_sitter import Node
45
+ else:
46
+ Node = None # type: ignore[assignment,misc] # pylint: disable=invalid-name
47
+
48
+
49
+ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violation]
50
+ """Analyzes TypeScript/JavaScript code for duplicate blocks.
51
+
52
+ SRP suppression: Complex tree-sitter AST analysis algorithm requires 20 methods to implement
53
+ sophisticated duplicate detection with false positive filtering. See file header for justification.
54
+ """
55
+
56
+ def __init__(self, filter_registry: BlockFilterRegistry | None = None):
57
+ """Initialize analyzer with optional custom filter registry.
58
+
59
+ Args:
60
+ filter_registry: Custom filter registry (uses defaults if None)
61
+ """
62
+ super().__init__()
63
+ self._filter_registry = filter_registry or create_default_registry()
64
+
65
+ def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
66
+ """Analyze TypeScript/JavaScript file for duplicate code blocks.
67
+
68
+ Filters out JSDoc comments, single statements, and interface definitions.
69
+
70
+ Args:
71
+ file_path: Path to source file
72
+ content: File content
73
+ config: DRY configuration
74
+
75
+ Returns:
76
+ List of CodeBlock instances with hash values
77
+ """
78
+ # Get JSDoc comment line ranges
79
+ jsdoc_ranges = self._get_jsdoc_ranges_from_content(content)
80
+
81
+ # Tokenize with line number tracking, skipping JSDoc lines
82
+ lines_with_numbers = self._tokenize_with_line_numbers(content, jsdoc_ranges)
83
+
84
+ # Generate rolling hash windows
85
+ windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
86
+
87
+ blocks = []
88
+ for hash_val, start_line, end_line, snippet in windows:
89
+ # Filter interface/type definitions
90
+ if not self._should_include_block(content, start_line, end_line):
91
+ continue
92
+
93
+ # Filter single statement patterns
94
+ if self._is_single_statement_in_source(content, start_line, end_line):
95
+ continue
96
+
97
+ block = CodeBlock(
98
+ file_path=file_path,
99
+ start_line=start_line,
100
+ end_line=end_line,
101
+ snippet=snippet,
102
+ hash_value=hash_val,
103
+ )
104
+
105
+ # Apply extensible filters (keyword arguments, imports, etc.)
106
+ if self._filter_registry.should_filter_block(block, content):
107
+ continue
108
+
109
+ blocks.append(block)
110
+
111
+ return blocks
112
+
113
+ def _get_jsdoc_ranges_from_content(self, content: str) -> set[int]:
114
+ """Extract line numbers that are part of JSDoc comments.
115
+
116
+ Args:
117
+ content: TypeScript/JavaScript source code
118
+
119
+ Returns:
120
+ Set of line numbers (1-indexed) that are part of JSDoc comments
121
+ """
122
+ if not TREE_SITTER_AVAILABLE:
123
+ return set()
124
+
125
+ from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
126
+
127
+ analyzer = TypeScriptBaseAnalyzer()
128
+ root = analyzer.parse_typescript(content)
129
+ if not root:
130
+ return set()
131
+
132
+ jsdoc_lines: set[int] = set()
133
+ self._collect_jsdoc_lines_recursive(root, jsdoc_lines)
134
+ return jsdoc_lines
135
+
136
+ def _is_jsdoc_comment(self, node: Node) -> bool:
137
+ """Check if node is a JSDoc comment.
138
+
139
+ Args:
140
+ node: Tree-sitter node to check
141
+
142
+ Returns:
143
+ True if node is JSDoc comment (/** ... */)
144
+ """
145
+ if node.type != "comment":
146
+ return False
147
+
148
+ text = node.text.decode() if node.text else ""
149
+ return text.startswith("/**")
150
+
151
+ def _add_comment_lines_to_set(self, node: Node, jsdoc_lines: set[int]) -> None:
152
+ """Add comment node's line range to set.
153
+
154
+ Args:
155
+ node: Comment node
156
+ jsdoc_lines: Set to add line numbers to
157
+ """
158
+ start_line = node.start_point[0] + 1
159
+ end_line = node.end_point[0] + 1
160
+ for line_num in range(start_line, end_line + 1):
161
+ jsdoc_lines.add(line_num)
162
+
163
+ def _collect_jsdoc_lines_recursive(self, node: Node, jsdoc_lines: set[int]) -> None:
164
+ """Recursively collect JSDoc comment line ranges.
165
+
166
+ Args:
167
+ node: Tree-sitter node to examine
168
+ jsdoc_lines: Set to accumulate line numbers
169
+ """
170
+ if self._is_jsdoc_comment(node):
171
+ self._add_comment_lines_to_set(node, jsdoc_lines)
172
+
173
+ for child in node.children:
174
+ self._collect_jsdoc_lines_recursive(child, jsdoc_lines)
175
+
176
+ def _tokenize_with_line_numbers(
177
+ self, content: str, jsdoc_lines: set[int]
178
+ ) -> list[tuple[int, str]]:
179
+ """Tokenize code while tracking original line numbers and skipping JSDoc.
180
+
181
+ Args:
182
+ content: Source code
183
+ jsdoc_lines: Set of line numbers that are JSDoc comments
184
+
185
+ Returns:
186
+ List of (original_line_number, normalized_code) tuples
187
+ """
188
+ lines_with_numbers = []
189
+
190
+ for line_num, line in enumerate(content.split("\n"), start=1):
191
+ # Skip JSDoc comment lines
192
+ if line_num in jsdoc_lines:
193
+ continue
194
+
195
+ # Use hasher's existing tokenization logic
196
+ line = self._hasher._strip_comments(line) # pylint: disable=protected-access
197
+ line = " ".join(line.split())
198
+
199
+ if not line:
200
+ continue
201
+
202
+ if self._hasher._is_import_statement(line): # pylint: disable=protected-access
203
+ continue
204
+
205
+ lines_with_numbers.append((line_num, line))
206
+
207
+ return lines_with_numbers
208
+
209
+ def _rolling_hash_with_tracking(
210
+ self, lines_with_numbers: list[tuple[int, str]], window_size: int
211
+ ) -> list[tuple[int, int, int, str]]:
212
+ """Create rolling hash windows while preserving original line numbers.
213
+
214
+ Args:
215
+ lines_with_numbers: List of (line_number, code) tuples
216
+ window_size: Number of lines per window
217
+
218
+ Returns:
219
+ List of (hash_value, start_line, end_line, snippet) tuples
220
+ """
221
+ if len(lines_with_numbers) < window_size:
222
+ return []
223
+
224
+ hashes = []
225
+ for i in range(len(lines_with_numbers) - window_size + 1):
226
+ window = lines_with_numbers[i : i + window_size]
227
+
228
+ # Extract just the code for hashing
229
+ code_lines = [code for _, code in window]
230
+ snippet = "\n".join(code_lines)
231
+ hash_val = hash(snippet)
232
+
233
+ # Get original line numbers
234
+ start_line = window[0][0]
235
+ end_line = window[-1][0]
236
+
237
+ hashes.append((hash_val, start_line, end_line, snippet))
238
+
239
+ return hashes
240
+
241
+ def _should_include_block(self, content: str, start_line: int, end_line: int) -> bool:
242
+ """Filter out blocks that overlap with interface/type definitions.
243
+
244
+ Args:
245
+ content: File content
246
+ start_line: Block start line
247
+ end_line: Block end line
248
+
249
+ Returns:
250
+ False if block overlaps interface definition, True otherwise
251
+ """
252
+ interface_ranges = self._find_interface_ranges(content)
253
+ return not self._overlaps_interface(start_line, end_line, interface_ranges)
254
+
255
+ def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
256
+ """Check if a line range in the original source is a single logical statement.
257
+
258
+ Uses tree-sitter AST analysis to detect patterns like:
259
+ - Decorators (@Component(...))
260
+ - Function call arguments
261
+ - Object literal properties
262
+ - Class field definitions
263
+ - Type assertions
264
+ - Chained method calls (single expression)
265
+
266
+ Args:
267
+ content: TypeScript source code
268
+ start_line: Starting line number (1-indexed)
269
+ end_line: Ending line number (1-indexed)
270
+
271
+ Returns:
272
+ True if this range represents a single logical statement/expression
273
+ """
274
+ if not TREE_SITTER_AVAILABLE:
275
+ return False
276
+
277
+ from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
278
+
279
+ analyzer = TypeScriptBaseAnalyzer()
280
+ root = analyzer.parse_typescript(content)
281
+ if not root:
282
+ return False
283
+
284
+ return self._check_overlapping_nodes(root, start_line, end_line)
285
+
286
+ def _check_overlapping_nodes(self, root: Node, start_line: int, end_line: int) -> bool:
287
+ """Check if any AST node overlaps and matches single-statement pattern.
288
+
289
+ Args:
290
+ root: Root tree-sitter node
291
+ start_line: Starting line number (1-indexed)
292
+ end_line: Ending line number (1-indexed)
293
+
294
+ Returns:
295
+ True if any node matches single-statement pattern
296
+ """
297
+ # Convert to 0-indexed for tree-sitter
298
+ ts_start = start_line - 1
299
+ ts_end = end_line - 1
300
+
301
+ for node in self._walk_nodes(root):
302
+ if self._node_overlaps_and_matches(node, ts_start, ts_end):
303
+ return True
304
+ return False
305
+
306
+ def _walk_nodes(self, node: Node) -> Generator[Node, None, None]:
307
+ """Generator to walk all nodes in tree.
308
+
309
+ Args:
310
+ node: Starting node
311
+
312
+ Yields:
313
+ All nodes in tree
314
+ """
315
+ yield node
316
+ for child in node.children:
317
+ yield from self._walk_nodes(child)
318
+
319
+ def _node_overlaps_and_matches(self, node: Node, ts_start: int, ts_end: int) -> bool:
320
+ """Check if node overlaps with range and matches single-statement pattern.
321
+
322
+ Args:
323
+ node: Tree-sitter node
324
+ ts_start: Starting line (0-indexed)
325
+ ts_end: Ending line (0-indexed)
326
+
327
+ Returns:
328
+ True if node overlaps and matches pattern
329
+ """
330
+ node_start = node.start_point[0]
331
+ node_end = node.end_point[0]
332
+
333
+ # Check if ranges overlap
334
+ overlaps = not (node_end < ts_start or node_start > ts_end)
335
+ if not overlaps:
336
+ return False
337
+
338
+ return self._is_single_statement_pattern(node, ts_start, ts_end)
339
+
340
+ def _matches_simple_container_pattern(self, node: Node, contains: bool) -> bool:
341
+ """Check if node is a simple container pattern (decorator, object, etc.).
342
+
343
+ Args:
344
+ node: AST node to check
345
+ contains: Whether node contains the range
346
+
347
+ Returns:
348
+ True if node matches simple container pattern
349
+ """
350
+ simple_types = (
351
+ "decorator",
352
+ "object",
353
+ "member_expression",
354
+ "as_expression",
355
+ "array_pattern",
356
+ )
357
+ return node.type in simple_types and contains
358
+
359
+ def _matches_call_expression_pattern(
360
+ self, node: Node, ts_start: int, ts_end: int, contains: bool
361
+ ) -> bool:
362
+ """Check if node is a call expression pattern.
363
+
364
+ Args:
365
+ node: AST node to check
366
+ ts_start: Starting line (0-indexed)
367
+ ts_end: Ending line (0-indexed)
368
+ contains: Whether node contains the range
369
+
370
+ Returns:
371
+ True if node matches call expression pattern
372
+ """
373
+ if node.type != "call_expression":
374
+ return False
375
+
376
+ # Check if this is a multi-line call containing the range
377
+ node_start = node.start_point[0]
378
+ node_end = node.end_point[0]
379
+ is_multiline = node_start < node_end
380
+ if is_multiline and node_start <= ts_start <= node_end:
381
+ return True
382
+
383
+ return contains
384
+
385
+ def _matches_declaration_pattern(self, node: Node, contains: bool) -> bool:
386
+ """Check if node is a lexical declaration pattern.
387
+
388
+ Args:
389
+ node: AST node to check
390
+ contains: Whether node contains the range
391
+
392
+ Returns:
393
+ True if node matches declaration pattern (excluding function bodies)
394
+ """
395
+ if node.type != "lexical_declaration" or not contains:
396
+ return False
397
+
398
+ # Only filter if simple value assignment, NOT a function body
399
+ if self._contains_function_body(node):
400
+ return False
401
+
402
+ return True
403
+
404
+ def _matches_jsx_pattern(self, node: Node, contains: bool) -> bool:
405
+ """Check if node is a JSX element pattern.
406
+
407
+ Args:
408
+ node: AST node to check
409
+ contains: Whether node contains the range
410
+
411
+ Returns:
412
+ True if node matches JSX pattern
413
+ """
414
+ jsx_types = ("jsx_opening_element", "jsx_self_closing_element")
415
+ return node.type in jsx_types and contains
416
+
417
+ def _matches_class_body_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
418
+ """Check if node is a class body field definition pattern.
419
+
420
+ Args:
421
+ node: AST node to check
422
+ ts_start: Starting line (0-indexed)
423
+ ts_end: Ending line (0-indexed)
424
+
425
+ Returns:
426
+ True if node is class body with field definitions
427
+ """
428
+ if node.type != "class_body":
429
+ return False
430
+
431
+ return self._is_in_class_field_area(node, ts_start, ts_end)
432
+
433
+ def _is_single_statement_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
434
+ """Check if an AST node represents a single-statement pattern to filter.
435
+
436
+ Delegates to specialized pattern matchers for different AST node categories.
437
+
438
+ Args:
439
+ node: AST node that overlaps with the line range
440
+ ts_start: Starting line number (0-indexed)
441
+ ts_end: Ending line number (0-indexed)
442
+
443
+ Returns:
444
+ True if this node represents a single logical statement pattern
445
+ """
446
+ node_start = node.start_point[0]
447
+ node_end = node.end_point[0]
448
+ contains = (node_start <= ts_start) and (node_end >= ts_end)
449
+
450
+ # Check pattern categories using specialized helpers - use list for any()
451
+ matchers = [
452
+ self._matches_simple_container_pattern(node, contains),
453
+ self._matches_call_expression_pattern(node, ts_start, ts_end, contains),
454
+ self._matches_declaration_pattern(node, contains),
455
+ self._matches_jsx_pattern(node, contains),
456
+ self._matches_class_body_pattern(node, ts_start, ts_end),
457
+ ]
458
+ return any(matchers)
459
+
460
+ def _contains_function_body(self, node: Node) -> bool:
461
+ """Check if node contains an arrow function or function expression.
462
+
463
+ Args:
464
+ node: Node to check
465
+
466
+ Returns:
467
+ True if node contains a function with a body
468
+ """
469
+ for child in node.children:
470
+ if child.type in ("arrow_function", "function", "function_expression"):
471
+ return True
472
+ if self._contains_function_body(child):
473
+ return True
474
+ return False
475
+
476
+ def _find_first_method_line(self, class_body: Node) -> int | None:
477
+ """Find line number of first method in class body.
478
+
479
+ Args:
480
+ class_body: Class body node
481
+
482
+ Returns:
483
+ Line number of first method or None if no methods
484
+ """
485
+ for child in class_body.children:
486
+ if child.type in ("method_definition", "function_declaration"):
487
+ return child.start_point[0]
488
+ return None
489
+
490
+ def _is_in_class_field_area(self, class_body: Node, ts_start: int, ts_end: int) -> bool:
491
+ """Check if range is in class field definition area (before methods).
492
+
493
+ Args:
494
+ class_body: Class body node
495
+ ts_start: Starting line (0-indexed)
496
+ ts_end: Ending line (0-indexed)
497
+
498
+ Returns:
499
+ True if range is in field area
500
+ """
501
+ first_method_line = self._find_first_method_line(class_body)
502
+ class_start = class_body.start_point[0]
503
+ class_end = class_body.end_point[0]
504
+
505
+ # No methods: check if range is in class body
506
+ if first_method_line is None:
507
+ return class_start <= ts_start and class_end >= ts_end
508
+
509
+ # Has methods: check if range is before first method
510
+ return class_start <= ts_start and ts_end < first_method_line
511
+
512
+ def _find_interface_ranges(self, content: str) -> list[tuple[int, int]]:
513
+ """Find line ranges of interface/type definitions.
514
+
515
+ Args:
516
+ content: File content
517
+
518
+ Returns:
519
+ List of (start_line, end_line) tuples for interface blocks
520
+ """
521
+ ranges: list[tuple[int, int]] = []
522
+ lines = content.split("\n")
523
+ state = {"in_interface": False, "start_line": 0, "brace_count": 0}
524
+
525
+ for i, line in enumerate(lines, start=1):
526
+ stripped = line.strip()
527
+ self._process_line_for_interface(stripped, i, state, ranges)
528
+
529
+ return ranges
530
+
531
+ def _process_line_for_interface(
532
+ self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
533
+ ) -> None:
534
+ """Process single line for interface detection.
535
+
536
+ Args:
537
+ stripped: Stripped line content
538
+ line_num: Line number
539
+ state: Tracking state (in_interface, start_line, brace_count)
540
+ ranges: Accumulated interface ranges
541
+ """
542
+ if self._is_interface_start(stripped):
543
+ self._handle_interface_start(stripped, line_num, state, ranges)
544
+ return
545
+
546
+ if state["in_interface"]:
547
+ self._handle_interface_continuation(stripped, line_num, state, ranges)
548
+
549
+ def _is_interface_start(self, stripped: str) -> bool:
550
+ """Check if line starts interface/type definition."""
551
+ return stripped.startswith(("interface ", "type ")) and "{" in stripped
552
+
553
+ def _handle_interface_start(
554
+ self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
555
+ ) -> None:
556
+ """Handle start of interface definition."""
557
+ state["in_interface"] = True
558
+ state["start_line"] = line_num
559
+ state["brace_count"] = stripped.count("{") - stripped.count("}")
560
+
561
+ if state["brace_count"] == 0: # Single-line interface
562
+ ranges.append((line_num, line_num))
563
+ state["in_interface"] = False
564
+
565
+ def _handle_interface_continuation(
566
+ self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
567
+ ) -> None:
568
+ """Handle continuation of interface definition."""
569
+ state["brace_count"] += stripped.count("{") - stripped.count("}")
570
+ if state["brace_count"] == 0:
571
+ ranges.append((state["start_line"], line_num))
572
+ state["in_interface"] = False
573
+
574
+ def _overlaps_interface(
575
+ self, start: int, end: int, interface_ranges: list[tuple[int, int]]
576
+ ) -> bool:
577
+ """Check if block overlaps with any interface range.
578
+
579
+ Args:
580
+ start: Block start line
581
+ end: Block end line
582
+ interface_ranges: List of interface definition ranges
583
+
584
+ Returns:
585
+ True if block overlaps with an interface
586
+ """
587
+ for if_start, if_end in interface_ranges:
588
+ if start <= if_end and end >= if_start:
589
+ return True
590
+ return False
@@ -0,0 +1,74 @@
1
+ """
2
+ Purpose: Violation message formatting for duplicate code detection
3
+
4
+ Scope: Construction of detailed violation messages with cross-file references
5
+
6
+ Overview: Builds comprehensive violation messages for duplicate code blocks. Formats messages to
7
+ include duplicate line count, occurrence count, and references to all other locations where
8
+ the duplicate appears. Provides helpful context for developers to identify and refactor
9
+ duplicated code. Follows established violation building pattern from nesting and SRP linters.
10
+
11
+ Dependencies: CodeBlock, Violation from core.types
12
+
13
+ Exports: DRYViolationBuilder class
14
+
15
+ Interfaces: DRYViolationBuilder.build_violation(block: CodeBlock, all_duplicates: list[CodeBlock],
16
+ rule_id: str) -> Violation
17
+
18
+ Implementation: Message formatting with cross-references, location tracking, multi-file duplicate
19
+ reporting
20
+ """
21
+
22
+ from src.core.types import Severity, Violation
23
+
24
+ from .cache import CodeBlock
25
+
26
+
27
+ class DRYViolationBuilder:
28
+ """Builds violation messages for duplicate code."""
29
+
30
+ def build_violation(
31
+ self, block: CodeBlock, all_duplicates: list[CodeBlock], rule_id: str
32
+ ) -> Violation:
33
+ """Build violation for duplicate code block.
34
+
35
+ Args:
36
+ block: The code block in current file
37
+ all_duplicates: All blocks with same hash (including current)
38
+ rule_id: Rule identifier
39
+
40
+ Returns:
41
+ Violation instance with formatted message
42
+ """
43
+ line_count = block.end_line - block.start_line + 1
44
+ occurrence_count = len(all_duplicates)
45
+
46
+ # Get other locations and format message
47
+ location_refs = self._get_location_refs(block, all_duplicates)
48
+ message = self._build_message(line_count, occurrence_count, location_refs)
49
+
50
+ return Violation(
51
+ rule_id=rule_id,
52
+ message=message,
53
+ file_path=str(block.file_path),
54
+ line=block.start_line,
55
+ column=1,
56
+ severity=Severity.ERROR,
57
+ )
58
+
59
+ def _get_location_refs(self, block: CodeBlock, all_duplicates: list[CodeBlock]) -> list[str]:
60
+ """Get formatted location strings for other duplicates."""
61
+ other_blocks = [
62
+ d
63
+ for d in all_duplicates
64
+ if d.file_path != block.file_path or d.start_line != block.start_line
65
+ ]
66
+
67
+ return [f"{loc.file_path}:{loc.start_line}-{loc.end_line}" for loc in other_blocks]
68
+
69
+ def _build_message(self, line_count: int, occurrence_count: int, locations: list[str]) -> str:
70
+ """Build violation message with location references."""
71
+ message = f"Duplicate code ({line_count} lines, {occurrence_count} occurrences)"
72
+ if locations:
73
+ message += f". Also found in: {', '.join(locations)}"
74
+ return message