thailint 0.1.5__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. src/__init__.py +7 -2
  2. src/analyzers/__init__.py +23 -0
  3. src/analyzers/typescript_base.py +148 -0
  4. src/api.py +1 -1
  5. src/cli.py +1111 -144
  6. src/config.py +12 -33
  7. src/core/base.py +102 -5
  8. src/core/cli_utils.py +206 -0
  9. src/core/config_parser.py +126 -0
  10. src/core/linter_utils.py +168 -0
  11. src/core/registry.py +17 -92
  12. src/core/rule_discovery.py +132 -0
  13. src/core/violation_builder.py +122 -0
  14. src/linter_config/ignore.py +112 -40
  15. src/linter_config/loader.py +3 -13
  16. src/linters/dry/__init__.py +23 -0
  17. src/linters/dry/base_token_analyzer.py +76 -0
  18. src/linters/dry/block_filter.py +265 -0
  19. src/linters/dry/block_grouper.py +59 -0
  20. src/linters/dry/cache.py +172 -0
  21. src/linters/dry/cache_query.py +61 -0
  22. src/linters/dry/config.py +134 -0
  23. src/linters/dry/config_loader.py +44 -0
  24. src/linters/dry/deduplicator.py +120 -0
  25. src/linters/dry/duplicate_storage.py +63 -0
  26. src/linters/dry/file_analyzer.py +90 -0
  27. src/linters/dry/inline_ignore.py +140 -0
  28. src/linters/dry/linter.py +163 -0
  29. src/linters/dry/python_analyzer.py +668 -0
  30. src/linters/dry/storage_initializer.py +42 -0
  31. src/linters/dry/token_hasher.py +169 -0
  32. src/linters/dry/typescript_analyzer.py +592 -0
  33. src/linters/dry/violation_builder.py +74 -0
  34. src/linters/dry/violation_filter.py +94 -0
  35. src/linters/dry/violation_generator.py +174 -0
  36. src/linters/file_header/__init__.py +24 -0
  37. src/linters/file_header/atemporal_detector.py +87 -0
  38. src/linters/file_header/config.py +66 -0
  39. src/linters/file_header/field_validator.py +69 -0
  40. src/linters/file_header/linter.py +313 -0
  41. src/linters/file_header/python_parser.py +86 -0
  42. src/linters/file_header/violation_builder.py +78 -0
  43. src/linters/file_placement/config_loader.py +86 -0
  44. src/linters/file_placement/directory_matcher.py +80 -0
  45. src/linters/file_placement/linter.py +262 -471
  46. src/linters/file_placement/path_resolver.py +61 -0
  47. src/linters/file_placement/pattern_matcher.py +55 -0
  48. src/linters/file_placement/pattern_validator.py +106 -0
  49. src/linters/file_placement/rule_checker.py +229 -0
  50. src/linters/file_placement/violation_factory.py +177 -0
  51. src/linters/magic_numbers/__init__.py +48 -0
  52. src/linters/magic_numbers/config.py +82 -0
  53. src/linters/magic_numbers/context_analyzer.py +247 -0
  54. src/linters/magic_numbers/linter.py +516 -0
  55. src/linters/magic_numbers/python_analyzer.py +76 -0
  56. src/linters/magic_numbers/typescript_analyzer.py +218 -0
  57. src/linters/magic_numbers/violation_builder.py +98 -0
  58. src/linters/nesting/__init__.py +6 -2
  59. src/linters/nesting/config.py +17 -4
  60. src/linters/nesting/linter.py +81 -168
  61. src/linters/nesting/typescript_analyzer.py +39 -102
  62. src/linters/nesting/typescript_function_extractor.py +130 -0
  63. src/linters/nesting/violation_builder.py +139 -0
  64. src/linters/print_statements/__init__.py +53 -0
  65. src/linters/print_statements/config.py +83 -0
  66. src/linters/print_statements/linter.py +430 -0
  67. src/linters/print_statements/python_analyzer.py +155 -0
  68. src/linters/print_statements/typescript_analyzer.py +135 -0
  69. src/linters/print_statements/violation_builder.py +98 -0
  70. src/linters/srp/__init__.py +99 -0
  71. src/linters/srp/class_analyzer.py +113 -0
  72. src/linters/srp/config.py +82 -0
  73. src/linters/srp/heuristics.py +89 -0
  74. src/linters/srp/linter.py +234 -0
  75. src/linters/srp/metrics_evaluator.py +47 -0
  76. src/linters/srp/python_analyzer.py +72 -0
  77. src/linters/srp/typescript_analyzer.py +75 -0
  78. src/linters/srp/typescript_metrics_calculator.py +90 -0
  79. src/linters/srp/violation_builder.py +117 -0
  80. src/orchestrator/core.py +54 -9
  81. src/templates/thailint_config_template.yaml +158 -0
  82. src/utils/__init__.py +4 -0
  83. src/utils/project_root.py +203 -0
  84. thailint-0.5.0.dist-info/METADATA +1286 -0
  85. thailint-0.5.0.dist-info/RECORD +96 -0
  86. {thailint-0.1.5.dist-info → thailint-0.5.0.dist-info}/WHEEL +1 -1
  87. src/.ai/layout.yaml +0 -48
  88. thailint-0.1.5.dist-info/METADATA +0 -629
  89. thailint-0.1.5.dist-info/RECORD +0 -28
  90. {thailint-0.1.5.dist-info → thailint-0.5.0.dist-info}/entry_points.txt +0 -0
  91. {thailint-0.1.5.dist-info → thailint-0.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,592 @@
1
+ """
2
+ Purpose: TypeScript/JavaScript source code tokenization and duplicate block analysis
3
+
4
+ Scope: TypeScript and JavaScript file analysis for duplicate detection
5
+
6
+ Overview: Analyzes TypeScript and JavaScript source files to extract code blocks for duplicate
7
+ detection. Inherits from BaseTokenAnalyzer for common token-based hashing and rolling hash
8
+ window logic. Adds TypeScript-specific filtering to exclude JSDoc comments, single statements
9
+ (decorators, function calls, object literals, class fields), and interface/type definitions.
10
+ Uses tree-sitter for AST-based filtering to achieve same sophistication as Python analyzer.
11
+
12
+ Dependencies: BaseTokenAnalyzer, CodeBlock, DRYConfig, pathlib.Path, tree-sitter
13
+
14
+ Exports: TypeScriptDuplicateAnalyzer class
15
+
16
+ Interfaces: TypeScriptDuplicateAnalyzer.analyze(file_path: Path, content: str, config: DRYConfig)
17
+ -> list[CodeBlock]
18
+
19
+ Implementation: Inherits analyze() workflow from BaseTokenAnalyzer, adds JSDoc comment extraction,
20
+ single statement detection using tree-sitter AST patterns, and interface filtering logic
21
+
22
+ SRP Exception: TypeScriptDuplicateAnalyzer has 20 methods and 324 lines (exceeds max 8 methods/200 lines)
23
+ Justification: Complex tree-sitter AST analysis algorithm for duplicate code detection with sophisticated
24
+ false positive filtering. Mirrors Python analyzer structure. Methods form tightly coupled algorithm
25
+ pipeline: JSDoc extraction, tokenization with line tracking, single-statement pattern detection across
26
+ 10+ AST node types (decorators, call_expression, object, class_body, member_expression, as_expression,
27
+ jsx elements, array_pattern), and context-aware filtering. Similar to parser or compiler pass architecture
28
+ where algorithmic cohesion is critical. Splitting would fragment the algorithm logic and make maintenance
29
+ harder by separating interdependent tree-sitter AST analysis steps. All methods contribute to single
30
+ responsibility: accurately detecting duplicate TypeScript/JavaScript code while minimizing false positives.
31
+ """
32
+
33
+ from collections.abc import Generator
34
+ from pathlib import Path
35
+
36
+ from src.analyzers.typescript_base import TREE_SITTER_AVAILABLE
37
+
38
+ from .base_token_analyzer import BaseTokenAnalyzer
39
+ from .block_filter import BlockFilterRegistry, create_default_registry
40
+ from .cache import CodeBlock
41
+ from .config import DRYConfig
42
+
43
+ if TREE_SITTER_AVAILABLE:
44
+ from tree_sitter import Node
45
+ else:
46
+ Node = None # type: ignore[assignment,misc] # pylint: disable=invalid-name
47
+
48
+
49
+ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violation]
50
+ """Analyzes TypeScript/JavaScript code for duplicate blocks.
51
+
52
+ SRP suppression: Complex tree-sitter AST analysis algorithm requires 20 methods to implement
53
+ sophisticated duplicate detection with false positive filtering. See file header for justification.
54
+ """
55
+
56
+ def __init__(self, filter_registry: BlockFilterRegistry | None = None):
57
+ """Initialize analyzer with optional custom filter registry.
58
+
59
+ Args:
60
+ filter_registry: Custom filter registry (uses defaults if None)
61
+ """
62
+ super().__init__()
63
+ self._filter_registry = filter_registry or create_default_registry()
64
+
65
+ def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
66
+ """Analyze TypeScript/JavaScript file for duplicate code blocks.
67
+
68
+ Filters out JSDoc comments, single statements, and interface definitions.
69
+
70
+ Args:
71
+ file_path: Path to source file
72
+ content: File content
73
+ config: DRY configuration
74
+
75
+ Returns:
76
+ List of CodeBlock instances with hash values
77
+ """
78
+ # Get JSDoc comment line ranges
79
+ jsdoc_ranges = self._get_jsdoc_ranges_from_content(content)
80
+
81
+ # Tokenize with line number tracking, skipping JSDoc lines
82
+ lines_with_numbers = self._tokenize_with_line_numbers(content, jsdoc_ranges)
83
+
84
+ # Generate rolling hash windows
85
+ windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
86
+
87
+ blocks = []
88
+ for hash_val, start_line, end_line, snippet in windows:
89
+ # Filter interface/type definitions
90
+ if not self._should_include_block(content, start_line, end_line):
91
+ continue
92
+
93
+ # Filter single statement patterns
94
+ if self._is_single_statement_in_source(content, start_line, end_line):
95
+ continue
96
+
97
+ block = CodeBlock(
98
+ file_path=file_path,
99
+ start_line=start_line,
100
+ end_line=end_line,
101
+ snippet=snippet,
102
+ hash_value=hash_val,
103
+ )
104
+
105
+ # Apply extensible filters (keyword arguments, imports, etc.)
106
+ if self._filter_registry.should_filter_block(block, content):
107
+ continue
108
+
109
+ blocks.append(block)
110
+
111
+ return blocks
112
+
113
+ def _get_jsdoc_ranges_from_content(self, content: str) -> set[int]:
114
+ """Extract line numbers that are part of JSDoc comments.
115
+
116
+ Args:
117
+ content: TypeScript/JavaScript source code
118
+
119
+ Returns:
120
+ Set of line numbers (1-indexed) that are part of JSDoc comments
121
+ """
122
+ if not TREE_SITTER_AVAILABLE:
123
+ return set()
124
+
125
+ from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
126
+
127
+ analyzer = TypeScriptBaseAnalyzer()
128
+ root = analyzer.parse_typescript(content)
129
+ if not root:
130
+ return set()
131
+
132
+ jsdoc_lines: set[int] = set()
133
+ self._collect_jsdoc_lines_recursive(root, jsdoc_lines)
134
+ return jsdoc_lines
135
+
136
+ def _is_jsdoc_comment(self, node: Node) -> bool:
137
+ """Check if node is a JSDoc comment.
138
+
139
+ Args:
140
+ node: Tree-sitter node to check
141
+
142
+ Returns:
143
+ True if node is JSDoc comment (/** ... */)
144
+ """
145
+ if node.type != "comment":
146
+ return False
147
+
148
+ text = node.text.decode() if node.text else ""
149
+ return text.startswith("/**")
150
+
151
+ def _add_comment_lines_to_set(self, node: Node, jsdoc_lines: set[int]) -> None:
152
+ """Add comment node's line range to set.
153
+
154
+ Args:
155
+ node: Comment node
156
+ jsdoc_lines: Set to add line numbers to
157
+ """
158
+ start_line = node.start_point[0] + 1
159
+ end_line = node.end_point[0] + 1
160
+ for line_num in range(start_line, end_line + 1):
161
+ jsdoc_lines.add(line_num)
162
+
163
+ def _collect_jsdoc_lines_recursive(self, node: Node, jsdoc_lines: set[int]) -> None:
164
+ """Recursively collect JSDoc comment line ranges.
165
+
166
+ Args:
167
+ node: Tree-sitter node to examine
168
+ jsdoc_lines: Set to accumulate line numbers
169
+ """
170
+ if self._is_jsdoc_comment(node):
171
+ self._add_comment_lines_to_set(node, jsdoc_lines)
172
+
173
+ for child in node.children:
174
+ self._collect_jsdoc_lines_recursive(child, jsdoc_lines)
175
+
176
+ def _tokenize_with_line_numbers(
177
+ self, content: str, jsdoc_lines: set[int]
178
+ ) -> list[tuple[int, str]]:
179
+ """Tokenize code while tracking original line numbers and skipping JSDoc.
180
+
181
+ Args:
182
+ content: Source code
183
+ jsdoc_lines: Set of line numbers that are JSDoc comments
184
+
185
+ Returns:
186
+ List of (original_line_number, normalized_code) tuples
187
+ """
188
+ lines_with_numbers = []
189
+ in_multiline_import = False
190
+
191
+ for line_num, line in enumerate(content.split("\n"), start=1):
192
+ # Skip JSDoc comment lines
193
+ if line_num in jsdoc_lines:
194
+ continue
195
+
196
+ line = self._hasher._normalize_line(line) # pylint: disable=protected-access
197
+ if not line:
198
+ continue
199
+
200
+ # Update multi-line import state and check if line should be skipped
201
+ in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
202
+ line, in_multiline_import
203
+ )
204
+ if should_skip:
205
+ continue
206
+
207
+ lines_with_numbers.append((line_num, line))
208
+
209
+ return lines_with_numbers
210
+
211
+ def _rolling_hash_with_tracking(
212
+ self, lines_with_numbers: list[tuple[int, str]], window_size: int
213
+ ) -> list[tuple[int, int, int, str]]:
214
+ """Create rolling hash windows while preserving original line numbers.
215
+
216
+ Args:
217
+ lines_with_numbers: List of (line_number, code) tuples
218
+ window_size: Number of lines per window
219
+
220
+ Returns:
221
+ List of (hash_value, start_line, end_line, snippet) tuples
222
+ """
223
+ if len(lines_with_numbers) < window_size:
224
+ return []
225
+
226
+ hashes = []
227
+ for i in range(len(lines_with_numbers) - window_size + 1):
228
+ window = lines_with_numbers[i : i + window_size]
229
+
230
+ # Extract just the code for hashing
231
+ code_lines = [code for _, code in window]
232
+ snippet = "\n".join(code_lines)
233
+ hash_val = hash(snippet)
234
+
235
+ # Get original line numbers
236
+ start_line = window[0][0]
237
+ end_line = window[-1][0]
238
+
239
+ hashes.append((hash_val, start_line, end_line, snippet))
240
+
241
+ return hashes
242
+
243
+ def _should_include_block(self, content: str, start_line: int, end_line: int) -> bool:
244
+ """Filter out blocks that overlap with interface/type definitions.
245
+
246
+ Args:
247
+ content: File content
248
+ start_line: Block start line
249
+ end_line: Block end line
250
+
251
+ Returns:
252
+ False if block overlaps interface definition, True otherwise
253
+ """
254
+ interface_ranges = self._find_interface_ranges(content)
255
+ return not self._overlaps_interface(start_line, end_line, interface_ranges)
256
+
257
+ def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
258
+ """Check if a line range in the original source is a single logical statement.
259
+
260
+ Uses tree-sitter AST analysis to detect patterns like:
261
+ - Decorators (@Component(...))
262
+ - Function call arguments
263
+ - Object literal properties
264
+ - Class field definitions
265
+ - Type assertions
266
+ - Chained method calls (single expression)
267
+
268
+ Args:
269
+ content: TypeScript source code
270
+ start_line: Starting line number (1-indexed)
271
+ end_line: Ending line number (1-indexed)
272
+
273
+ Returns:
274
+ True if this range represents a single logical statement/expression
275
+ """
276
+ if not TREE_SITTER_AVAILABLE:
277
+ return False
278
+
279
+ from src.analyzers.typescript_base import TypeScriptBaseAnalyzer
280
+
281
+ analyzer = TypeScriptBaseAnalyzer()
282
+ root = analyzer.parse_typescript(content)
283
+ if not root:
284
+ return False
285
+
286
+ return self._check_overlapping_nodes(root, start_line, end_line)
287
+
288
+ def _check_overlapping_nodes(self, root: Node, start_line: int, end_line: int) -> bool:
289
+ """Check if any AST node overlaps and matches single-statement pattern.
290
+
291
+ Args:
292
+ root: Root tree-sitter node
293
+ start_line: Starting line number (1-indexed)
294
+ end_line: Ending line number (1-indexed)
295
+
296
+ Returns:
297
+ True if any node matches single-statement pattern
298
+ """
299
+ # Convert to 0-indexed for tree-sitter
300
+ ts_start = start_line - 1
301
+ ts_end = end_line - 1
302
+
303
+ for node in self._walk_nodes(root):
304
+ if self._node_overlaps_and_matches(node, ts_start, ts_end):
305
+ return True
306
+ return False
307
+
308
+ def _walk_nodes(self, node: Node) -> Generator[Node, None, None]:
309
+ """Generator to walk all nodes in tree.
310
+
311
+ Args:
312
+ node: Starting node
313
+
314
+ Yields:
315
+ All nodes in tree
316
+ """
317
+ yield node
318
+ for child in node.children:
319
+ yield from self._walk_nodes(child)
320
+
321
+ def _node_overlaps_and_matches(self, node: Node, ts_start: int, ts_end: int) -> bool:
322
+ """Check if node overlaps with range and matches single-statement pattern.
323
+
324
+ Args:
325
+ node: Tree-sitter node
326
+ ts_start: Starting line (0-indexed)
327
+ ts_end: Ending line (0-indexed)
328
+
329
+ Returns:
330
+ True if node overlaps and matches pattern
331
+ """
332
+ node_start = node.start_point[0]
333
+ node_end = node.end_point[0]
334
+
335
+ # Check if ranges overlap
336
+ overlaps = not (node_end < ts_start or node_start > ts_end)
337
+ if not overlaps:
338
+ return False
339
+
340
+ return self._is_single_statement_pattern(node, ts_start, ts_end)
341
+
342
+ def _matches_simple_container_pattern(self, node: Node, contains: bool) -> bool:
343
+ """Check if node is a simple container pattern (decorator, object, etc.).
344
+
345
+ Args:
346
+ node: AST node to check
347
+ contains: Whether node contains the range
348
+
349
+ Returns:
350
+ True if node matches simple container pattern
351
+ """
352
+ simple_types = (
353
+ "decorator",
354
+ "object",
355
+ "member_expression",
356
+ "as_expression",
357
+ "array_pattern",
358
+ )
359
+ return node.type in simple_types and contains
360
+
361
+ def _matches_call_expression_pattern(
362
+ self, node: Node, ts_start: int, ts_end: int, contains: bool
363
+ ) -> bool:
364
+ """Check if node is a call expression pattern.
365
+
366
+ Args:
367
+ node: AST node to check
368
+ ts_start: Starting line (0-indexed)
369
+ ts_end: Ending line (0-indexed)
370
+ contains: Whether node contains the range
371
+
372
+ Returns:
373
+ True if node matches call expression pattern
374
+ """
375
+ if node.type != "call_expression":
376
+ return False
377
+
378
+ # Check if this is a multi-line call containing the range
379
+ node_start = node.start_point[0]
380
+ node_end = node.end_point[0]
381
+ is_multiline = node_start < node_end
382
+ if is_multiline and node_start <= ts_start <= node_end:
383
+ return True
384
+
385
+ return contains
386
+
387
+ def _matches_declaration_pattern(self, node: Node, contains: bool) -> bool:
388
+ """Check if node is a lexical declaration pattern.
389
+
390
+ Args:
391
+ node: AST node to check
392
+ contains: Whether node contains the range
393
+
394
+ Returns:
395
+ True if node matches declaration pattern (excluding function bodies)
396
+ """
397
+ if node.type != "lexical_declaration" or not contains:
398
+ return False
399
+
400
+ # Only filter if simple value assignment, NOT a function body
401
+ if self._contains_function_body(node):
402
+ return False
403
+
404
+ return True
405
+
406
+ def _matches_jsx_pattern(self, node: Node, contains: bool) -> bool:
407
+ """Check if node is a JSX element pattern.
408
+
409
+ Args:
410
+ node: AST node to check
411
+ contains: Whether node contains the range
412
+
413
+ Returns:
414
+ True if node matches JSX pattern
415
+ """
416
+ jsx_types = ("jsx_opening_element", "jsx_self_closing_element")
417
+ return node.type in jsx_types and contains
418
+
419
+ def _matches_class_body_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
420
+ """Check if node is a class body field definition pattern.
421
+
422
+ Args:
423
+ node: AST node to check
424
+ ts_start: Starting line (0-indexed)
425
+ ts_end: Ending line (0-indexed)
426
+
427
+ Returns:
428
+ True if node is class body with field definitions
429
+ """
430
+ if node.type != "class_body":
431
+ return False
432
+
433
+ return self._is_in_class_field_area(node, ts_start, ts_end)
434
+
435
+ def _is_single_statement_pattern(self, node: Node, ts_start: int, ts_end: int) -> bool:
436
+ """Check if an AST node represents a single-statement pattern to filter.
437
+
438
+ Delegates to specialized pattern matchers for different AST node categories.
439
+
440
+ Args:
441
+ node: AST node that overlaps with the line range
442
+ ts_start: Starting line number (0-indexed)
443
+ ts_end: Ending line number (0-indexed)
444
+
445
+ Returns:
446
+ True if this node represents a single logical statement pattern
447
+ """
448
+ node_start = node.start_point[0]
449
+ node_end = node.end_point[0]
450
+ contains = (node_start <= ts_start) and (node_end >= ts_end)
451
+
452
+ # Check pattern categories using specialized helpers - use list for any()
453
+ matchers = [
454
+ self._matches_simple_container_pattern(node, contains),
455
+ self._matches_call_expression_pattern(node, ts_start, ts_end, contains),
456
+ self._matches_declaration_pattern(node, contains),
457
+ self._matches_jsx_pattern(node, contains),
458
+ self._matches_class_body_pattern(node, ts_start, ts_end),
459
+ ]
460
+ return any(matchers)
461
+
462
+ def _contains_function_body(self, node: Node) -> bool:
463
+ """Check if node contains an arrow function or function expression.
464
+
465
+ Args:
466
+ node: Node to check
467
+
468
+ Returns:
469
+ True if node contains a function with a body
470
+ """
471
+ for child in node.children:
472
+ if child.type in ("arrow_function", "function", "function_expression"):
473
+ return True
474
+ if self._contains_function_body(child):
475
+ return True
476
+ return False
477
+
478
+ def _find_first_method_line(self, class_body: Node) -> int | None:
479
+ """Find line number of first method in class body.
480
+
481
+ Args:
482
+ class_body: Class body node
483
+
484
+ Returns:
485
+ Line number of first method or None if no methods
486
+ """
487
+ for child in class_body.children:
488
+ if child.type in ("method_definition", "function_declaration"):
489
+ return child.start_point[0]
490
+ return None
491
+
492
+ def _is_in_class_field_area(self, class_body: Node, ts_start: int, ts_end: int) -> bool:
493
+ """Check if range is in class field definition area (before methods).
494
+
495
+ Args:
496
+ class_body: Class body node
497
+ ts_start: Starting line (0-indexed)
498
+ ts_end: Ending line (0-indexed)
499
+
500
+ Returns:
501
+ True if range is in field area
502
+ """
503
+ first_method_line = self._find_first_method_line(class_body)
504
+ class_start = class_body.start_point[0]
505
+ class_end = class_body.end_point[0]
506
+
507
+ # No methods: check if range is in class body
508
+ if first_method_line is None:
509
+ return class_start <= ts_start and class_end >= ts_end
510
+
511
+ # Has methods: check if range is before first method
512
+ return class_start <= ts_start and ts_end < first_method_line
513
+
514
+ def _find_interface_ranges(self, content: str) -> list[tuple[int, int]]:
515
+ """Find line ranges of interface/type definitions.
516
+
517
+ Args:
518
+ content: File content
519
+
520
+ Returns:
521
+ List of (start_line, end_line) tuples for interface blocks
522
+ """
523
+ ranges: list[tuple[int, int]] = []
524
+ lines = content.split("\n")
525
+ state = {"in_interface": False, "start_line": 0, "brace_count": 0}
526
+
527
+ for i, line in enumerate(lines, start=1):
528
+ stripped = line.strip()
529
+ self._process_line_for_interface(stripped, i, state, ranges)
530
+
531
+ return ranges
532
+
533
+ def _process_line_for_interface(
534
+ self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
535
+ ) -> None:
536
+ """Process single line for interface detection.
537
+
538
+ Args:
539
+ stripped: Stripped line content
540
+ line_num: Line number
541
+ state: Tracking state (in_interface, start_line, brace_count)
542
+ ranges: Accumulated interface ranges
543
+ """
544
+ if self._is_interface_start(stripped):
545
+ self._handle_interface_start(stripped, line_num, state, ranges)
546
+ return
547
+
548
+ if state["in_interface"]:
549
+ self._handle_interface_continuation(stripped, line_num, state, ranges)
550
+
551
+ def _is_interface_start(self, stripped: str) -> bool:
552
+ """Check if line starts interface/type definition."""
553
+ return stripped.startswith(("interface ", "type ")) and "{" in stripped
554
+
555
+ def _handle_interface_start(
556
+ self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
557
+ ) -> None:
558
+ """Handle start of interface definition."""
559
+ state["in_interface"] = True
560
+ state["start_line"] = line_num
561
+ state["brace_count"] = stripped.count("{") - stripped.count("}")
562
+
563
+ if state["brace_count"] == 0: # Single-line interface
564
+ ranges.append((line_num, line_num))
565
+ state["in_interface"] = False
566
+
567
+ def _handle_interface_continuation(
568
+ self, stripped: str, line_num: int, state: dict, ranges: list[tuple[int, int]]
569
+ ) -> None:
570
+ """Handle continuation of interface definition."""
571
+ state["brace_count"] += stripped.count("{") - stripped.count("}")
572
+ if state["brace_count"] == 0:
573
+ ranges.append((state["start_line"], line_num))
574
+ state["in_interface"] = False
575
+
576
+ def _overlaps_interface(
577
+ self, start: int, end: int, interface_ranges: list[tuple[int, int]]
578
+ ) -> bool:
579
+ """Check if block overlaps with any interface range.
580
+
581
+ Args:
582
+ start: Block start line
583
+ end: Block end line
584
+ interface_ranges: List of interface definition ranges
585
+
586
+ Returns:
587
+ True if block overlaps with an interface
588
+ """
589
+ for if_start, if_end in interface_ranges:
590
+ if start <= if_end and end >= if_start:
591
+ return True
592
+ return False
@@ -0,0 +1,74 @@
1
+ """
2
+ Purpose: Violation message formatting for duplicate code detection
3
+
4
+ Scope: Construction of detailed violation messages with cross-file references
5
+
6
+ Overview: Builds comprehensive violation messages for duplicate code blocks. Formats messages to
7
+ include duplicate line count, occurrence count, and references to all other locations where
8
+ the duplicate appears. Provides helpful context for developers to identify and refactor
9
+ duplicated code. Follows established violation building pattern from nesting and SRP linters.
10
+
11
+ Dependencies: CodeBlock, Violation from core.types
12
+
13
+ Exports: DRYViolationBuilder class
14
+
15
+ Interfaces: DRYViolationBuilder.build_violation(block: CodeBlock, all_duplicates: list[CodeBlock],
16
+ rule_id: str) -> Violation
17
+
18
+ Implementation: Message formatting with cross-references, location tracking, multi-file duplicate
19
+ reporting
20
+ """
21
+
22
+ from src.core.types import Severity, Violation
23
+
24
+ from .cache import CodeBlock
25
+
26
+
27
+ class DRYViolationBuilder:
28
+ """Builds violation messages for duplicate code."""
29
+
30
+ def build_violation(
31
+ self, block: CodeBlock, all_duplicates: list[CodeBlock], rule_id: str
32
+ ) -> Violation:
33
+ """Build violation for duplicate code block.
34
+
35
+ Args:
36
+ block: The code block in current file
37
+ all_duplicates: All blocks with same hash (including current)
38
+ rule_id: Rule identifier
39
+
40
+ Returns:
41
+ Violation instance with formatted message
42
+ """
43
+ line_count = block.end_line - block.start_line + 1
44
+ occurrence_count = len(all_duplicates)
45
+
46
+ # Get other locations and format message
47
+ location_refs = self._get_location_refs(block, all_duplicates)
48
+ message = self._build_message(line_count, occurrence_count, location_refs)
49
+
50
+ return Violation(
51
+ rule_id=rule_id,
52
+ message=message,
53
+ file_path=str(block.file_path),
54
+ line=block.start_line,
55
+ column=1,
56
+ severity=Severity.ERROR,
57
+ )
58
+
59
+ def _get_location_refs(self, block: CodeBlock, all_duplicates: list[CodeBlock]) -> list[str]:
60
+ """Get formatted location strings for other duplicates."""
61
+ other_blocks = [
62
+ d
63
+ for d in all_duplicates
64
+ if d.file_path != block.file_path or d.start_line != block.start_line
65
+ ]
66
+
67
+ return [f"{loc.file_path}:{loc.start_line}-{loc.end_line}" for loc in other_blocks]
68
+
69
+ def _build_message(self, line_count: int, occurrence_count: int, locations: list[str]) -> str:
70
+ """Build violation message with location references."""
71
+ message = f"Duplicate code ({line_count} lines, {occurrence_count} occurrences)"
72
+ if locations:
73
+ message += f". Also found in: {', '.join(locations)}"
74
+ return message