thailint 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. src/cli.py +646 -36
  2. src/config.py +6 -2
  3. src/core/base.py +90 -5
  4. src/core/config_parser.py +31 -4
  5. src/linters/dry/block_filter.py +5 -2
  6. src/linters/dry/cache.py +46 -92
  7. src/linters/dry/config.py +17 -13
  8. src/linters/dry/duplicate_storage.py +17 -80
  9. src/linters/dry/file_analyzer.py +11 -48
  10. src/linters/dry/linter.py +5 -12
  11. src/linters/dry/python_analyzer.py +188 -37
  12. src/linters/dry/storage_initializer.py +9 -18
  13. src/linters/dry/token_hasher.py +63 -9
  14. src/linters/dry/typescript_analyzer.py +7 -5
  15. src/linters/dry/violation_filter.py +4 -1
  16. src/linters/file_header/__init__.py +24 -0
  17. src/linters/file_header/atemporal_detector.py +87 -0
  18. src/linters/file_header/config.py +66 -0
  19. src/linters/file_header/field_validator.py +69 -0
  20. src/linters/file_header/linter.py +313 -0
  21. src/linters/file_header/python_parser.py +86 -0
  22. src/linters/file_header/violation_builder.py +78 -0
  23. src/linters/file_placement/linter.py +15 -4
  24. src/linters/magic_numbers/__init__.py +48 -0
  25. src/linters/magic_numbers/config.py +82 -0
  26. src/linters/magic_numbers/context_analyzer.py +247 -0
  27. src/linters/magic_numbers/linter.py +516 -0
  28. src/linters/magic_numbers/python_analyzer.py +76 -0
  29. src/linters/magic_numbers/typescript_analyzer.py +218 -0
  30. src/linters/magic_numbers/violation_builder.py +98 -0
  31. src/linters/nesting/__init__.py +6 -2
  32. src/linters/nesting/config.py +6 -3
  33. src/linters/nesting/linter.py +8 -19
  34. src/linters/nesting/typescript_analyzer.py +1 -0
  35. src/linters/print_statements/__init__.py +53 -0
  36. src/linters/print_statements/config.py +83 -0
  37. src/linters/print_statements/linter.py +430 -0
  38. src/linters/print_statements/python_analyzer.py +155 -0
  39. src/linters/print_statements/typescript_analyzer.py +135 -0
  40. src/linters/print_statements/violation_builder.py +98 -0
  41. src/linters/srp/__init__.py +3 -3
  42. src/linters/srp/config.py +12 -6
  43. src/linters/srp/linter.py +33 -24
  44. src/orchestrator/core.py +12 -2
  45. src/templates/thailint_config_template.yaml +158 -0
  46. src/utils/project_root.py +135 -16
  47. {thailint-0.2.0.dist-info → thailint-0.5.0.dist-info}/METADATA +387 -81
  48. thailint-0.5.0.dist-info/RECORD +96 -0
  49. {thailint-0.2.0.dist-info → thailint-0.5.0.dist-info}/WHEEL +1 -1
  50. thailint-0.2.0.dist-info/RECORD +0 -75
  51. {thailint-0.2.0.dist-info → thailint-0.5.0.dist-info}/entry_points.txt +0 -0
  52. {thailint-0.2.0.dist-info → thailint-0.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,45 +1,32 @@
1
1
  """
2
2
  Purpose: File analysis orchestration for duplicate detection
3
3
 
4
- Scope: Coordinates language-specific analyzers and cache checking
4
+ Scope: Coordinates language-specific analyzers
5
5
 
6
- Overview: Orchestrates file analysis by delegating to language-specific analyzers (Python, TypeScript)
7
- and checking cache freshness. Handles cache hits by loading from cache, and cache misses by
8
- analyzing files. Separates file analysis orchestration from main linter rule logic to maintain
9
- SRP compliance.
6
+ Overview: Orchestrates file analysis by delegating to language-specific analyzers (Python, TypeScript).
7
+ Analyzes files fresh every run - no cache loading. Separates file analysis orchestration from
8
+ main linter rule logic to maintain SRP compliance.
10
9
 
11
- Dependencies: PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer, DRYCache, DRYConfig, CodeBlock
10
+ Dependencies: PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer, DRYConfig, CodeBlock
12
11
 
13
12
  Exports: FileAnalyzer class
14
13
 
15
- Interfaces: FileAnalyzer.analyze_or_load(file_path, content, language, config, cache)
14
+ Interfaces: FileAnalyzer.analyze(file_path, content, language, config)
16
15
 
17
- Implementation: Delegates to language-specific analyzers, checks cache freshness
16
+ Implementation: Delegates to language-specific analyzers, always performs fresh analysis
18
17
  """
19
18
 
20
- from dataclasses import dataclass
21
19
  from pathlib import Path
22
20
 
23
21
  from .block_filter import BlockFilterRegistry, create_default_registry
24
- from .cache import CodeBlock, DRYCache
22
+ from .cache import CodeBlock
25
23
  from .config import DRYConfig
26
24
  from .python_analyzer import PythonDuplicateAnalyzer
27
25
  from .typescript_analyzer import TypeScriptDuplicateAnalyzer
28
26
 
29
27
 
30
- @dataclass
31
- class FileAnalysisContext:
32
- """Context for file analysis."""
33
-
34
- file_path: Path
35
- content: str
36
- language: str
37
- config: DRYConfig
38
- cache: DRYCache | None
39
-
40
-
41
28
  class FileAnalyzer:
42
- """Orchestrates file analysis with cache support."""
29
+ """Orchestrates file analysis for duplicate detection."""
43
30
 
44
31
  def __init__(self, config: DRYConfig | None = None) -> None:
45
32
  """Initialize with language-specific analyzers.
@@ -77,49 +64,25 @@ class FileAnalyzer:
77
64
 
78
65
  return registry
79
66
 
80
- def analyze_or_load( # pylint: disable=too-many-arguments,too-many-positional-arguments
67
+ def analyze(
81
68
  self,
82
69
  file_path: Path,
83
70
  content: str,
84
71
  language: str,
85
72
  config: DRYConfig,
86
- cache: DRYCache | None = None,
87
73
  ) -> list[CodeBlock]:
88
- """Analyze file or load from cache.
74
+ """Analyze file for duplicate code blocks.
89
75
 
90
76
  Args:
91
77
  file_path: Path to file
92
78
  content: File content
93
79
  language: File language
94
80
  config: DRY configuration
95
- cache: Optional cache instance
96
81
 
97
82
  Returns:
98
83
  List of CodeBlock instances
99
84
  """
100
- # Check if file is fresh in cache
101
- if cache:
102
- mtime = file_path.stat().st_mtime
103
- if cache.is_fresh(file_path, mtime):
104
- return cache.load(file_path)
105
-
106
85
  # Analyze file based on language
107
- return self._analyze_file(file_path, content, language, config)
108
-
109
- def _analyze_file(
110
- self, file_path: Path, content: str, language: str, config: DRYConfig
111
- ) -> list[CodeBlock]:
112
- """Analyze file based on language.
113
-
114
- Args:
115
- file_path: Path to file
116
- content: File content
117
- language: File language
118
- config: DRY configuration
119
-
120
- Returns:
121
- List of CodeBlock instances
122
- """
123
86
  if language == "python":
124
87
  return self._python_analyzer.analyze(file_path, content, config)
125
88
  if language in ("typescript", "javascript"):
src/linters/dry/linter.py CHANGED
@@ -37,7 +37,7 @@ from .storage_initializer import StorageInitializer
37
37
  from .violation_generator import ViolationGenerator
38
38
 
39
39
  if TYPE_CHECKING:
40
- from .cache import CodeBlock, DRYCache
40
+ from .cache import CodeBlock
41
41
 
42
42
 
43
43
  @dataclass
@@ -132,24 +132,17 @@ class DRYRule(BaseLintRule):
132
132
  return # Should never happen after initialization
133
133
 
134
134
  file_path = Path(context.file_path)
135
- cache = self._get_cache()
136
- blocks = self._file_analyzer.analyze_or_load(
137
- file_path, context.file_content, context.language, config, cache
135
+ blocks = self._file_analyzer.analyze(
136
+ file_path, context.file_content, context.language, config
138
137
  )
139
138
 
140
139
  if blocks:
141
140
  self._store_blocks(file_path, blocks)
142
141
 
143
- def _get_cache(self) -> DRYCache | None:
144
- """Get cache from storage if available."""
145
- if not self._storage:
146
- return None
147
- return self._storage._cache # pylint: disable=protected-access
148
-
149
142
  def _store_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
150
- """Store blocks in memory if storage available."""
143
+ """Store blocks in SQLite if storage available."""
151
144
  if self._storage:
152
- self._storage.add_blocks_to_memory(file_path, blocks)
145
+ self._storage.add_blocks(file_path, blocks)
153
146
 
154
147
  def finalize(self) -> list[Violation]:
155
148
  """Generate violations after all files processed.
@@ -38,6 +38,10 @@ from .block_filter import BlockFilterRegistry, create_default_registry
38
38
  from .cache import CodeBlock
39
39
  from .config import DRYConfig
40
40
 
41
+ # AST context checking constants
42
+ AST_LOOKBACK_LINES = 10
43
+ AST_LOOKFORWARD_LINES = 5
44
+
41
45
  # Type alias for AST nodes that have line number attributes
42
46
  # All stmt and expr nodes have lineno and end_lineno after parsing
43
47
  ASTWithLineNumbers = ast.stmt | ast.expr
@@ -58,8 +62,15 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
58
62
  """
59
63
  super().__init__()
60
64
  self._filter_registry = filter_registry or create_default_registry()
61
-
62
- def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
65
+ # Performance optimization: Cache parsed AST to avoid re-parsing for each hash window
66
+ self._cached_ast: ast.Module | None = None
67
+ self._cached_content: str | None = None
68
+ # Performance optimization: Line-to-node index for O(1) lookups instead of O(n) ast.walk()
69
+ self._line_to_nodes: dict[int, list[ast.AST]] | None = None
70
+
71
+ def analyze( # thailint: ignore[nesting.excessive-depth]
72
+ self, file_path: Path, content: str, config: DRYConfig
73
+ ) -> list[CodeBlock]:
63
74
  """Analyze Python file for duplicate code blocks, excluding docstrings.
64
75
 
65
76
  Args:
@@ -70,37 +81,73 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
70
81
  Returns:
71
82
  List of CodeBlock instances with hash values
72
83
  """
73
- # Get docstring line ranges
74
- docstring_ranges = self._get_docstring_ranges_from_content(content)
84
+ # Performance optimization: Parse AST once and cache for _is_single_statement_in_source() calls
85
+ self._cached_ast = self._parse_content_safe(content)
86
+ self._cached_content = content
87
+
88
+ # Performance optimization: Build line-to-node index for O(1) lookups
89
+ self._line_to_nodes = self._build_line_to_node_index(self._cached_ast)
90
+
91
+ try:
92
+ # Get docstring line ranges
93
+ docstring_ranges = self._get_docstring_ranges_from_content(content)
94
+
95
+ # Tokenize with line number tracking
96
+ lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
75
97
 
76
- # Tokenize with line number tracking
77
- lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
98
+ # Generate rolling hash windows
99
+ windows = self._rolling_hash_with_tracking(
100
+ lines_with_numbers, config.min_duplicate_lines
101
+ )
78
102
 
79
- # Generate rolling hash windows
80
- windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
103
+ return self._filter_valid_blocks(windows, file_path, content)
104
+ finally:
105
+ # Clear cache after analysis to avoid memory leaks
106
+ self._cached_ast = None
107
+ self._cached_content = None
108
+ self._line_to_nodes = None
81
109
 
110
+ def _filter_valid_blocks(
111
+ self,
112
+ windows: list[tuple[int, int, int, str]],
113
+ file_path: Path,
114
+ content: str,
115
+ ) -> list[CodeBlock]:
116
+ """Filter hash windows and create valid CodeBlock instances."""
82
117
  blocks = []
83
118
  for hash_val, start_line, end_line, snippet in windows:
84
- # Skip blocks that are single logical statements
85
- # Check the original source code, not the normalized snippet
86
- if self._is_single_statement_in_source(content, start_line, end_line):
87
- continue
88
-
89
- block = CodeBlock(
90
- file_path=file_path,
91
- start_line=start_line,
92
- end_line=end_line,
93
- snippet=snippet,
94
- hash_value=hash_val,
119
+ block = self._create_block_if_valid(
120
+ file_path, content, hash_val, start_line, end_line, snippet
95
121
  )
122
+ if block:
123
+ blocks.append(block)
124
+ return blocks
96
125
 
97
- # Apply extensible filters (keyword arguments, imports, etc.)
98
- if self._filter_registry.should_filter_block(block, content):
99
- continue
126
+ def _create_block_if_valid( # pylint: disable=too-many-arguments,too-many-positional-arguments
127
+ self,
128
+ file_path: Path,
129
+ content: str,
130
+ hash_val: int,
131
+ start_line: int,
132
+ end_line: int,
133
+ snippet: str,
134
+ ) -> CodeBlock | None:
135
+ """Create CodeBlock if it passes all validation checks."""
136
+ if self._is_single_statement_in_source(content, start_line, end_line):
137
+ return None
100
138
 
101
- blocks.append(block)
139
+ block = CodeBlock(
140
+ file_path=file_path,
141
+ start_line=start_line,
142
+ end_line=end_line,
143
+ snippet=snippet,
144
+ hash_value=hash_val,
145
+ )
102
146
 
103
- return blocks
147
+ if self._filter_registry.should_filter_block(block, content):
148
+ return None
149
+
150
+ return block
104
151
 
105
152
  def _get_docstring_ranges_from_content(self, content: str) -> set[int]:
106
153
  """Extract line numbers that are part of docstrings.
@@ -168,20 +215,21 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
168
215
  List of (original_line_number, normalized_code) tuples
169
216
  """
170
217
  lines_with_numbers = []
218
+ in_multiline_import = False
171
219
 
172
220
  for line_num, line in enumerate(content.split("\n"), start=1):
173
- # Skip docstring lines
174
221
  if line_num in docstring_lines:
175
222
  continue
176
223
 
177
- # Use hasher's existing tokenization logic
178
- line = self._hasher._strip_comments(line) # pylint: disable=protected-access
179
- line = " ".join(line.split())
180
-
224
+ line = self._hasher._normalize_line(line) # pylint: disable=protected-access
181
225
  if not line:
182
226
  continue
183
227
 
184
- if self._hasher._is_import_statement(line): # pylint: disable=protected-access
228
+ # Update multi-line import state and check if line should be skipped
229
+ in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
230
+ line, in_multiline_import
231
+ )
232
+ if should_skip:
185
233
  continue
186
234
 
187
235
  lines_with_numbers.append((line_num, line))
@@ -221,10 +269,20 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
221
269
  return hashes
222
270
 
223
271
  def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
224
- """Check if a line range in the original source is a single logical statement."""
225
- tree = self._parse_content_safe(content)
226
- if tree is None:
227
- return False
272
+ """Check if a line range in the original source is a single logical statement.
273
+
274
+ Performance optimization: Uses cached AST if available (set by analyze() method)
275
+ to avoid re-parsing the entire file for each hash window check.
276
+ """
277
+ # Use cached AST if available and content matches
278
+ tree: ast.Module | None
279
+ if self._cached_ast is not None and content == self._cached_content:
280
+ tree = self._cached_ast
281
+ else:
282
+ # Fallback: parse content (used by tests or standalone calls)
283
+ tree = self._parse_content_safe(content)
284
+ if tree is None:
285
+ return False
228
286
 
229
287
  return self._check_overlapping_nodes(tree, start_line, end_line)
230
288
 
@@ -236,13 +294,99 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
236
294
  except SyntaxError:
237
295
  return None
238
296
 
297
+ @staticmethod
298
+ def _build_line_to_node_index(tree: ast.Module | None) -> dict[int, list[ast.AST]] | None:
299
+ """Build an index mapping each line number to overlapping AST nodes.
300
+
301
+ Performance optimization: This allows O(1) lookups instead of O(n) ast.walk() calls.
302
+ For a file with 5,144 nodes and 673 hash windows, this reduces 3.46M node operations
303
+ to just ~3,365 relevant node checks (99.9% reduction).
304
+
305
+ Args:
306
+ tree: Parsed AST tree (None if parsing failed)
307
+
308
+ Returns:
309
+ Dictionary mapping line numbers to list of AST nodes overlapping that line,
310
+ or None if tree is None
311
+ """
312
+ if tree is None:
313
+ return None
314
+
315
+ line_to_nodes: dict[int, list[ast.AST]] = {}
316
+ for node in ast.walk(tree):
317
+ if PythonDuplicateAnalyzer._node_has_line_info(node):
318
+ PythonDuplicateAnalyzer._add_node_to_index(node, line_to_nodes)
319
+
320
+ return line_to_nodes
321
+
322
+ @staticmethod
323
+ def _node_has_line_info(node: ast.AST) -> bool:
324
+ """Check if node has valid line number information."""
325
+ if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
326
+ return False
327
+ return node.lineno is not None and node.end_lineno is not None
328
+
329
+ @staticmethod
330
+ def _add_node_to_index(node: ast.AST, line_to_nodes: dict[int, list[ast.AST]]) -> None:
331
+ """Add node to all lines it overlaps in the index."""
332
+ for line_num in range(node.lineno, node.end_lineno + 1): # type: ignore[attr-defined]
333
+ if line_num not in line_to_nodes:
334
+ line_to_nodes[line_num] = []
335
+ line_to_nodes[line_num].append(node)
336
+
239
337
  def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
240
- """Check if any AST node overlaps and matches single-statement pattern."""
338
+ """Check if any AST node overlaps and matches single-statement pattern.
339
+
340
+ Performance optimization: Use line-to-node index for O(1) lookups instead of O(n) ast.walk().
341
+ """
342
+ if self._line_to_nodes is not None:
343
+ return self._check_nodes_via_index(start_line, end_line)
344
+ return self._check_nodes_via_walk(tree, start_line, end_line)
345
+
346
+ def _check_nodes_via_index(self, start_line: int, end_line: int) -> bool:
347
+ """Check nodes using line-to-node index for O(1) lookups."""
348
+ candidates = self._collect_candidate_nodes_from_index(start_line, end_line)
349
+ return self._any_node_matches_pattern(candidates, start_line, end_line)
350
+
351
+ def _collect_candidate_nodes_from_index(self, start_line: int, end_line: int) -> set[ast.AST]:
352
+ """Collect unique nodes that overlap with the line range from index."""
353
+ candidate_nodes: set[ast.AST] = set()
354
+ for line_num in range(start_line, end_line + 1):
355
+ if self._line_to_nodes and line_num in self._line_to_nodes:
356
+ candidate_nodes.update(self._line_to_nodes[line_num])
357
+ return candidate_nodes
358
+
359
+ def _any_node_matches_pattern(
360
+ self, nodes: set[ast.AST], start_line: int, end_line: int
361
+ ) -> bool:
362
+ """Check if any node matches single-statement pattern."""
363
+ for node in nodes:
364
+ if self._is_single_statement_pattern(node, start_line, end_line):
365
+ return True
366
+ return False
367
+
368
+ def _check_nodes_via_walk(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
369
+ """Check nodes using ast.walk() fallback for tests or standalone calls."""
241
370
  for node in ast.walk(tree):
242
- if self._node_overlaps_and_matches(node, start_line, end_line):
371
+ if self._node_matches_via_walk(node, start_line, end_line):
243
372
  return True
244
373
  return False
245
374
 
375
+ def _node_matches_via_walk(self, node: ast.AST, start_line: int, end_line: int) -> bool:
376
+ """Check if a single node overlaps and matches pattern."""
377
+ if not self._node_overlaps_range(node, start_line, end_line):
378
+ return False
379
+ return self._is_single_statement_pattern(node, start_line, end_line)
380
+
381
+ @staticmethod
382
+ def _node_overlaps_range(node: ast.AST, start_line: int, end_line: int) -> bool:
383
+ """Check if node overlaps with the given line range."""
384
+ if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
385
+ return False
386
+ node_end = node.end_lineno
387
+ node_start = node.lineno
388
+ return not (node_end < start_line or node_start > end_line)
389
+
246
390
  def _node_overlaps_and_matches(self, node: ast.AST, start_line: int, end_line: int) -> bool:
247
391
  """Check if node overlaps with range and matches single-statement pattern."""
248
392
  if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
@@ -514,4 +658,11 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
514
658
  return True
515
659
  return False
516
660
 
517
- return self._check_ast_context(lines, start_line, end_line, 10, 5, is_within_class_body)
661
+ return self._check_ast_context(
662
+ lines,
663
+ start_line,
664
+ end_line,
665
+ AST_LOOKBACK_LINES,
666
+ AST_LOOKFORWARD_LINES,
667
+ is_within_class_body,
668
+ )
@@ -1,23 +1,21 @@
1
1
  """
2
2
  Purpose: Storage initialization for DRY linter
3
3
 
4
- Scope: Initializes DuplicateStorage with cache or in-memory fallback
4
+ Scope: Initializes DuplicateStorage with SQLite storage
5
5
 
6
- Overview: Handles storage initialization based on DRY configuration. Creates SQLite cache when
7
- cache_enabled is true, or triggers in-memory fallback when false (Decision 6). Separates
8
- initialization logic from main linter rule to maintain SRP compliance.
6
+ Overview: Handles storage initialization based on DRY configuration. Creates SQLite storage in
7
+ either memory or tempfile mode based on config.storage_mode. Separates initialization logic
8
+ from main linter rule to maintain SRP compliance.
9
9
 
10
- Dependencies: BaseLintContext, DRYConfig, DRYCache, DuplicateStorage, Path
10
+ Dependencies: BaseLintContext, DRYConfig, DRYCache, DuplicateStorage
11
11
 
12
12
  Exports: StorageInitializer class
13
13
 
14
14
  Interfaces: StorageInitializer.initialize(context, config) -> DuplicateStorage
15
15
 
16
- Implementation: Creates cache if enabled, delegates to DuplicateStorage for storage management
16
+ Implementation: Creates DRYCache with storage_mode, delegates to DuplicateStorage for management
17
17
  """
18
18
 
19
- from pathlib import Path
20
-
21
19
  from src.core.base import BaseLintContext
22
20
 
23
21
  from .cache import DRYCache
@@ -36,16 +34,9 @@ class StorageInitializer:
36
34
  config: DRY configuration
37
35
 
38
36
  Returns:
39
- DuplicateStorage instance
37
+ DuplicateStorage instance with SQLite storage
40
38
  """
41
- cache = None
42
- if config.cache_enabled:
43
- # Use SQLite cache
44
- metadata = getattr(context, "metadata", {})
45
- project_root = metadata.get("_project_root", Path.cwd())
46
- cache_path = project_root / config.cache_path
47
- cache_path.parent.mkdir(parents=True, exist_ok=True)
48
- cache = DRYCache(cache_path)
49
- # else: cache = None triggers in-memory fallback in DuplicateStorage
39
+ # Create SQLite storage (in-memory or tempfile based on config)
40
+ cache = DRYCache(storage_mode=config.storage_mode)
50
41
 
51
42
  return DuplicateStorage(cache)
@@ -33,26 +33,80 @@ class TokenHasher:
33
33
  List of normalized code lines (non-empty, comments removed, imports filtered)
34
34
  """
35
35
  lines = []
36
+ in_multiline_import = False
36
37
 
37
38
  for line in code.split("\n"):
38
- # Remove comments (language-specific logic can be added)
39
- line = self._strip_comments(line)
40
-
41
- # Normalize whitespace (collapse to single space)
42
- line = " ".join(line.split())
43
-
44
- # Skip empty lines
39
+ line = self._normalize_line(line)
45
40
  if not line:
46
41
  continue
47
42
 
48
- # Skip import statements (common false positive)
49
- if self._is_import_statement(line):
43
+ # Update multi-line import state and check if line should be skipped
44
+ in_multiline_import, should_skip = self._should_skip_import_line(
45
+ line, in_multiline_import
46
+ )
47
+ if should_skip:
50
48
  continue
51
49
 
52
50
  lines.append(line)
53
51
 
54
52
  return lines
55
53
 
54
+ def _normalize_line(self, line: str) -> str:
55
+ """Normalize a line by removing comments and excess whitespace.
56
+
57
+ Args:
58
+ line: Raw source code line
59
+
60
+ Returns:
61
+ Normalized line (empty string if line has no content)
62
+ """
63
+ line = self._strip_comments(line)
64
+ return " ".join(line.split())
65
+
66
+ def _should_skip_import_line(self, line: str, in_multiline_import: bool) -> tuple[bool, bool]:
67
+ """Determine if an import line should be skipped.
68
+
69
+ Args:
70
+ line: Normalized code line
71
+ in_multiline_import: Whether we're currently inside a multi-line import
72
+
73
+ Returns:
74
+ Tuple of (new_in_multiline_import_state, should_skip_line)
75
+ """
76
+ if self._is_multiline_import_start(line):
77
+ return True, True
78
+
79
+ if in_multiline_import:
80
+ return self._handle_multiline_import_continuation(line)
81
+
82
+ if self._is_import_statement(line):
83
+ return False, True
84
+
85
+ return False, False
86
+
87
+ def _is_multiline_import_start(self, line: str) -> bool:
88
+ """Check if line starts a multi-line import statement.
89
+
90
+ Args:
91
+ line: Normalized code line
92
+
93
+ Returns:
94
+ True if line starts a multi-line import (has opening paren but no closing)
95
+ """
96
+ return self._is_import_statement(line) and "(" in line and ")" not in line
97
+
98
+ def _handle_multiline_import_continuation(self, line: str) -> tuple[bool, bool]:
99
+ """Handle a line that's part of a multi-line import.
100
+
101
+ Args:
102
+ line: Normalized code line inside a multi-line import
103
+
104
+ Returns:
105
+ Tuple of (still_in_import, should_skip)
106
+ """
107
+ closes_import = ")" in line
108
+ return not closes_import, True
109
+
56
110
  def _strip_comments(self, line: str) -> str:
57
111
  """Remove comments from line (Python # and // style).
58
112
 
@@ -186,20 +186,22 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
186
186
  List of (original_line_number, normalized_code) tuples
187
187
  """
188
188
  lines_with_numbers = []
189
+ in_multiline_import = False
189
190
 
190
191
  for line_num, line in enumerate(content.split("\n"), start=1):
191
192
  # Skip JSDoc comment lines
192
193
  if line_num in jsdoc_lines:
193
194
  continue
194
195
 
195
- # Use hasher's existing tokenization logic
196
- line = self._hasher._strip_comments(line) # pylint: disable=protected-access
197
- line = " ".join(line.split())
198
-
196
+ line = self._hasher._normalize_line(line) # pylint: disable=protected-access
199
197
  if not line:
200
198
  continue
201
199
 
202
- if self._hasher._is_import_statement(line): # pylint: disable=protected-access
200
+ # Update multi-line import state and check if line should be skipped
201
+ in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
202
+ line, in_multiline_import
203
+ )
204
+ if should_skip:
203
205
  continue
204
206
 
205
207
  lines_with_numbers.append((line_num, line))
@@ -18,6 +18,9 @@ Implementation: Iterates through sorted violations, keeps first of each overlapp
18
18
 
19
19
  from src.core.types import Violation
20
20
 
21
+ # Default fallback for line count when parsing fails
22
+ DEFAULT_FALLBACK_LINE_COUNT = 5
23
+
21
24
 
22
25
  class ViolationFilter:
23
26
  """Filters overlapping violations."""
@@ -88,4 +91,4 @@ class ViolationFilter:
88
91
  end = message.index(" lines")
89
92
  return int(message[start:end])
90
93
  except (ValueError, IndexError):
91
- return 5 # Default fallback
94
+ return DEFAULT_FALLBACK_LINE_COUNT # Default fallback
@@ -0,0 +1,24 @@
1
+ """
2
+ File: src/linters/file_header/__init__.py
3
+ Purpose: File header linter module initialization
4
+ Exports: FileHeaderRule
5
+ Depends: linter.FileHeaderRule
6
+ Implements: Module-level exports for clean API
7
+ Related: linter.py for main rule implementation
8
+
9
+ Overview:
10
+ Initializes the file header linter module providing multi-language file header
11
+ validation with mandatory field checking, atemporal language detection, and configuration
12
+ support. Main entry point for file header linting functionality.
13
+
14
+ Usage:
15
+ from src.linters.file_header import FileHeaderRule
16
+ rule = FileHeaderRule()
17
+ violations = rule.check(context)
18
+
19
+ Notes: Follows standard Python module initialization pattern with __all__ export control
20
+ """
21
+
22
+ from .linter import FileHeaderRule
23
+
24
+ __all__ = ["FileHeaderRule"]