thailint 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/cli.py +646 -36
- src/config.py +6 -2
- src/core/base.py +90 -5
- src/core/config_parser.py +31 -4
- src/linters/dry/block_filter.py +5 -2
- src/linters/dry/cache.py +46 -92
- src/linters/dry/config.py +17 -13
- src/linters/dry/duplicate_storage.py +17 -80
- src/linters/dry/file_analyzer.py +11 -48
- src/linters/dry/linter.py +5 -12
- src/linters/dry/python_analyzer.py +188 -37
- src/linters/dry/storage_initializer.py +9 -18
- src/linters/dry/token_hasher.py +63 -9
- src/linters/dry/typescript_analyzer.py +7 -5
- src/linters/dry/violation_filter.py +4 -1
- src/linters/file_header/__init__.py +24 -0
- src/linters/file_header/atemporal_detector.py +87 -0
- src/linters/file_header/config.py +66 -0
- src/linters/file_header/field_validator.py +69 -0
- src/linters/file_header/linter.py +313 -0
- src/linters/file_header/python_parser.py +86 -0
- src/linters/file_header/violation_builder.py +78 -0
- src/linters/file_placement/linter.py +15 -4
- src/linters/magic_numbers/__init__.py +48 -0
- src/linters/magic_numbers/config.py +82 -0
- src/linters/magic_numbers/context_analyzer.py +247 -0
- src/linters/magic_numbers/linter.py +516 -0
- src/linters/magic_numbers/python_analyzer.py +76 -0
- src/linters/magic_numbers/typescript_analyzer.py +218 -0
- src/linters/magic_numbers/violation_builder.py +98 -0
- src/linters/nesting/__init__.py +6 -2
- src/linters/nesting/config.py +6 -3
- src/linters/nesting/linter.py +8 -19
- src/linters/nesting/typescript_analyzer.py +1 -0
- src/linters/print_statements/__init__.py +53 -0
- src/linters/print_statements/config.py +83 -0
- src/linters/print_statements/linter.py +430 -0
- src/linters/print_statements/python_analyzer.py +155 -0
- src/linters/print_statements/typescript_analyzer.py +135 -0
- src/linters/print_statements/violation_builder.py +98 -0
- src/linters/srp/__init__.py +3 -3
- src/linters/srp/config.py +12 -6
- src/linters/srp/linter.py +33 -24
- src/orchestrator/core.py +12 -2
- src/templates/thailint_config_template.yaml +158 -0
- src/utils/project_root.py +135 -16
- {thailint-0.2.0.dist-info → thailint-0.5.0.dist-info}/METADATA +387 -81
- thailint-0.5.0.dist-info/RECORD +96 -0
- {thailint-0.2.0.dist-info → thailint-0.5.0.dist-info}/WHEEL +1 -1
- thailint-0.2.0.dist-info/RECORD +0 -75
- {thailint-0.2.0.dist-info → thailint-0.5.0.dist-info}/entry_points.txt +0 -0
- {thailint-0.2.0.dist-info → thailint-0.5.0.dist-info/licenses}/LICENSE +0 -0
src/linters/dry/file_analyzer.py
CHANGED
|
@@ -1,45 +1,32 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Purpose: File analysis orchestration for duplicate detection
|
|
3
3
|
|
|
4
|
-
Scope: Coordinates language-specific analyzers
|
|
4
|
+
Scope: Coordinates language-specific analyzers
|
|
5
5
|
|
|
6
|
-
Overview: Orchestrates file analysis by delegating to language-specific analyzers (Python, TypeScript)
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
SRP compliance.
|
|
6
|
+
Overview: Orchestrates file analysis by delegating to language-specific analyzers (Python, TypeScript).
|
|
7
|
+
Analyzes files fresh every run - no cache loading. Separates file analysis orchestration from
|
|
8
|
+
main linter rule logic to maintain SRP compliance.
|
|
10
9
|
|
|
11
|
-
Dependencies: PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer,
|
|
10
|
+
Dependencies: PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer, DRYConfig, CodeBlock
|
|
12
11
|
|
|
13
12
|
Exports: FileAnalyzer class
|
|
14
13
|
|
|
15
|
-
Interfaces: FileAnalyzer.
|
|
14
|
+
Interfaces: FileAnalyzer.analyze(file_path, content, language, config)
|
|
16
15
|
|
|
17
|
-
Implementation: Delegates to language-specific analyzers,
|
|
16
|
+
Implementation: Delegates to language-specific analyzers, always performs fresh analysis
|
|
18
17
|
"""
|
|
19
18
|
|
|
20
|
-
from dataclasses import dataclass
|
|
21
19
|
from pathlib import Path
|
|
22
20
|
|
|
23
21
|
from .block_filter import BlockFilterRegistry, create_default_registry
|
|
24
|
-
from .cache import CodeBlock
|
|
22
|
+
from .cache import CodeBlock
|
|
25
23
|
from .config import DRYConfig
|
|
26
24
|
from .python_analyzer import PythonDuplicateAnalyzer
|
|
27
25
|
from .typescript_analyzer import TypeScriptDuplicateAnalyzer
|
|
28
26
|
|
|
29
27
|
|
|
30
|
-
@dataclass
|
|
31
|
-
class FileAnalysisContext:
|
|
32
|
-
"""Context for file analysis."""
|
|
33
|
-
|
|
34
|
-
file_path: Path
|
|
35
|
-
content: str
|
|
36
|
-
language: str
|
|
37
|
-
config: DRYConfig
|
|
38
|
-
cache: DRYCache | None
|
|
39
|
-
|
|
40
|
-
|
|
41
28
|
class FileAnalyzer:
|
|
42
|
-
"""Orchestrates file analysis
|
|
29
|
+
"""Orchestrates file analysis for duplicate detection."""
|
|
43
30
|
|
|
44
31
|
def __init__(self, config: DRYConfig | None = None) -> None:
|
|
45
32
|
"""Initialize with language-specific analyzers.
|
|
@@ -77,49 +64,25 @@ class FileAnalyzer:
|
|
|
77
64
|
|
|
78
65
|
return registry
|
|
79
66
|
|
|
80
|
-
def
|
|
67
|
+
def analyze(
|
|
81
68
|
self,
|
|
82
69
|
file_path: Path,
|
|
83
70
|
content: str,
|
|
84
71
|
language: str,
|
|
85
72
|
config: DRYConfig,
|
|
86
|
-
cache: DRYCache | None = None,
|
|
87
73
|
) -> list[CodeBlock]:
|
|
88
|
-
"""Analyze file
|
|
74
|
+
"""Analyze file for duplicate code blocks.
|
|
89
75
|
|
|
90
76
|
Args:
|
|
91
77
|
file_path: Path to file
|
|
92
78
|
content: File content
|
|
93
79
|
language: File language
|
|
94
80
|
config: DRY configuration
|
|
95
|
-
cache: Optional cache instance
|
|
96
81
|
|
|
97
82
|
Returns:
|
|
98
83
|
List of CodeBlock instances
|
|
99
84
|
"""
|
|
100
|
-
# Check if file is fresh in cache
|
|
101
|
-
if cache:
|
|
102
|
-
mtime = file_path.stat().st_mtime
|
|
103
|
-
if cache.is_fresh(file_path, mtime):
|
|
104
|
-
return cache.load(file_path)
|
|
105
|
-
|
|
106
85
|
# Analyze file based on language
|
|
107
|
-
return self._analyze_file(file_path, content, language, config)
|
|
108
|
-
|
|
109
|
-
def _analyze_file(
|
|
110
|
-
self, file_path: Path, content: str, language: str, config: DRYConfig
|
|
111
|
-
) -> list[CodeBlock]:
|
|
112
|
-
"""Analyze file based on language.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
file_path: Path to file
|
|
116
|
-
content: File content
|
|
117
|
-
language: File language
|
|
118
|
-
config: DRY configuration
|
|
119
|
-
|
|
120
|
-
Returns:
|
|
121
|
-
List of CodeBlock instances
|
|
122
|
-
"""
|
|
123
86
|
if language == "python":
|
|
124
87
|
return self._python_analyzer.analyze(file_path, content, config)
|
|
125
88
|
if language in ("typescript", "javascript"):
|
src/linters/dry/linter.py
CHANGED
|
@@ -37,7 +37,7 @@ from .storage_initializer import StorageInitializer
|
|
|
37
37
|
from .violation_generator import ViolationGenerator
|
|
38
38
|
|
|
39
39
|
if TYPE_CHECKING:
|
|
40
|
-
from .cache import CodeBlock
|
|
40
|
+
from .cache import CodeBlock
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
@dataclass
|
|
@@ -132,24 +132,17 @@ class DRYRule(BaseLintRule):
|
|
|
132
132
|
return # Should never happen after initialization
|
|
133
133
|
|
|
134
134
|
file_path = Path(context.file_path)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
file_path, context.file_content, context.language, config, cache
|
|
135
|
+
blocks = self._file_analyzer.analyze(
|
|
136
|
+
file_path, context.file_content, context.language, config
|
|
138
137
|
)
|
|
139
138
|
|
|
140
139
|
if blocks:
|
|
141
140
|
self._store_blocks(file_path, blocks)
|
|
142
141
|
|
|
143
|
-
def _get_cache(self) -> DRYCache | None:
|
|
144
|
-
"""Get cache from storage if available."""
|
|
145
|
-
if not self._storage:
|
|
146
|
-
return None
|
|
147
|
-
return self._storage._cache # pylint: disable=protected-access
|
|
148
|
-
|
|
149
142
|
def _store_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
150
|
-
"""Store blocks in
|
|
143
|
+
"""Store blocks in SQLite if storage available."""
|
|
151
144
|
if self._storage:
|
|
152
|
-
self._storage.
|
|
145
|
+
self._storage.add_blocks(file_path, blocks)
|
|
153
146
|
|
|
154
147
|
def finalize(self) -> list[Violation]:
|
|
155
148
|
"""Generate violations after all files processed.
|
|
@@ -38,6 +38,10 @@ from .block_filter import BlockFilterRegistry, create_default_registry
|
|
|
38
38
|
from .cache import CodeBlock
|
|
39
39
|
from .config import DRYConfig
|
|
40
40
|
|
|
41
|
+
# AST context checking constants
|
|
42
|
+
AST_LOOKBACK_LINES = 10
|
|
43
|
+
AST_LOOKFORWARD_LINES = 5
|
|
44
|
+
|
|
41
45
|
# Type alias for AST nodes that have line number attributes
|
|
42
46
|
# All stmt and expr nodes have lineno and end_lineno after parsing
|
|
43
47
|
ASTWithLineNumbers = ast.stmt | ast.expr
|
|
@@ -58,8 +62,15 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
58
62
|
"""
|
|
59
63
|
super().__init__()
|
|
60
64
|
self._filter_registry = filter_registry or create_default_registry()
|
|
61
|
-
|
|
62
|
-
|
|
65
|
+
# Performance optimization: Cache parsed AST to avoid re-parsing for each hash window
|
|
66
|
+
self._cached_ast: ast.Module | None = None
|
|
67
|
+
self._cached_content: str | None = None
|
|
68
|
+
# Performance optimization: Line-to-node index for O(1) lookups instead of O(n) ast.walk()
|
|
69
|
+
self._line_to_nodes: dict[int, list[ast.AST]] | None = None
|
|
70
|
+
|
|
71
|
+
def analyze( # thailint: ignore[nesting.excessive-depth]
|
|
72
|
+
self, file_path: Path, content: str, config: DRYConfig
|
|
73
|
+
) -> list[CodeBlock]:
|
|
63
74
|
"""Analyze Python file for duplicate code blocks, excluding docstrings.
|
|
64
75
|
|
|
65
76
|
Args:
|
|
@@ -70,37 +81,73 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
70
81
|
Returns:
|
|
71
82
|
List of CodeBlock instances with hash values
|
|
72
83
|
"""
|
|
73
|
-
#
|
|
74
|
-
|
|
84
|
+
# Performance optimization: Parse AST once and cache for _is_single_statement_in_source() calls
|
|
85
|
+
self._cached_ast = self._parse_content_safe(content)
|
|
86
|
+
self._cached_content = content
|
|
87
|
+
|
|
88
|
+
# Performance optimization: Build line-to-node index for O(1) lookups
|
|
89
|
+
self._line_to_nodes = self._build_line_to_node_index(self._cached_ast)
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
# Get docstring line ranges
|
|
93
|
+
docstring_ranges = self._get_docstring_ranges_from_content(content)
|
|
94
|
+
|
|
95
|
+
# Tokenize with line number tracking
|
|
96
|
+
lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
|
|
75
97
|
|
|
76
|
-
|
|
77
|
-
|
|
98
|
+
# Generate rolling hash windows
|
|
99
|
+
windows = self._rolling_hash_with_tracking(
|
|
100
|
+
lines_with_numbers, config.min_duplicate_lines
|
|
101
|
+
)
|
|
78
102
|
|
|
79
|
-
|
|
80
|
-
|
|
103
|
+
return self._filter_valid_blocks(windows, file_path, content)
|
|
104
|
+
finally:
|
|
105
|
+
# Clear cache after analysis to avoid memory leaks
|
|
106
|
+
self._cached_ast = None
|
|
107
|
+
self._cached_content = None
|
|
108
|
+
self._line_to_nodes = None
|
|
81
109
|
|
|
110
|
+
def _filter_valid_blocks(
|
|
111
|
+
self,
|
|
112
|
+
windows: list[tuple[int, int, int, str]],
|
|
113
|
+
file_path: Path,
|
|
114
|
+
content: str,
|
|
115
|
+
) -> list[CodeBlock]:
|
|
116
|
+
"""Filter hash windows and create valid CodeBlock instances."""
|
|
82
117
|
blocks = []
|
|
83
118
|
for hash_val, start_line, end_line, snippet in windows:
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if self._is_single_statement_in_source(content, start_line, end_line):
|
|
87
|
-
continue
|
|
88
|
-
|
|
89
|
-
block = CodeBlock(
|
|
90
|
-
file_path=file_path,
|
|
91
|
-
start_line=start_line,
|
|
92
|
-
end_line=end_line,
|
|
93
|
-
snippet=snippet,
|
|
94
|
-
hash_value=hash_val,
|
|
119
|
+
block = self._create_block_if_valid(
|
|
120
|
+
file_path, content, hash_val, start_line, end_line, snippet
|
|
95
121
|
)
|
|
122
|
+
if block:
|
|
123
|
+
blocks.append(block)
|
|
124
|
+
return blocks
|
|
96
125
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
126
|
+
def _create_block_if_valid( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
127
|
+
self,
|
|
128
|
+
file_path: Path,
|
|
129
|
+
content: str,
|
|
130
|
+
hash_val: int,
|
|
131
|
+
start_line: int,
|
|
132
|
+
end_line: int,
|
|
133
|
+
snippet: str,
|
|
134
|
+
) -> CodeBlock | None:
|
|
135
|
+
"""Create CodeBlock if it passes all validation checks."""
|
|
136
|
+
if self._is_single_statement_in_source(content, start_line, end_line):
|
|
137
|
+
return None
|
|
100
138
|
|
|
101
|
-
|
|
139
|
+
block = CodeBlock(
|
|
140
|
+
file_path=file_path,
|
|
141
|
+
start_line=start_line,
|
|
142
|
+
end_line=end_line,
|
|
143
|
+
snippet=snippet,
|
|
144
|
+
hash_value=hash_val,
|
|
145
|
+
)
|
|
102
146
|
|
|
103
|
-
|
|
147
|
+
if self._filter_registry.should_filter_block(block, content):
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
return block
|
|
104
151
|
|
|
105
152
|
def _get_docstring_ranges_from_content(self, content: str) -> set[int]:
|
|
106
153
|
"""Extract line numbers that are part of docstrings.
|
|
@@ -168,20 +215,21 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
168
215
|
List of (original_line_number, normalized_code) tuples
|
|
169
216
|
"""
|
|
170
217
|
lines_with_numbers = []
|
|
218
|
+
in_multiline_import = False
|
|
171
219
|
|
|
172
220
|
for line_num, line in enumerate(content.split("\n"), start=1):
|
|
173
|
-
# Skip docstring lines
|
|
174
221
|
if line_num in docstring_lines:
|
|
175
222
|
continue
|
|
176
223
|
|
|
177
|
-
|
|
178
|
-
line = self._hasher._strip_comments(line) # pylint: disable=protected-access
|
|
179
|
-
line = " ".join(line.split())
|
|
180
|
-
|
|
224
|
+
line = self._hasher._normalize_line(line) # pylint: disable=protected-access
|
|
181
225
|
if not line:
|
|
182
226
|
continue
|
|
183
227
|
|
|
184
|
-
if
|
|
228
|
+
# Update multi-line import state and check if line should be skipped
|
|
229
|
+
in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
|
|
230
|
+
line, in_multiline_import
|
|
231
|
+
)
|
|
232
|
+
if should_skip:
|
|
185
233
|
continue
|
|
186
234
|
|
|
187
235
|
lines_with_numbers.append((line_num, line))
|
|
@@ -221,10 +269,20 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
221
269
|
return hashes
|
|
222
270
|
|
|
223
271
|
def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
|
|
224
|
-
"""Check if a line range in the original source is a single logical statement.
|
|
225
|
-
|
|
226
|
-
if
|
|
227
|
-
|
|
272
|
+
"""Check if a line range in the original source is a single logical statement.
|
|
273
|
+
|
|
274
|
+
Performance optimization: Uses cached AST if available (set by analyze() method)
|
|
275
|
+
to avoid re-parsing the entire file for each hash window check.
|
|
276
|
+
"""
|
|
277
|
+
# Use cached AST if available and content matches
|
|
278
|
+
tree: ast.Module | None
|
|
279
|
+
if self._cached_ast is not None and content == self._cached_content:
|
|
280
|
+
tree = self._cached_ast
|
|
281
|
+
else:
|
|
282
|
+
# Fallback: parse content (used by tests or standalone calls)
|
|
283
|
+
tree = self._parse_content_safe(content)
|
|
284
|
+
if tree is None:
|
|
285
|
+
return False
|
|
228
286
|
|
|
229
287
|
return self._check_overlapping_nodes(tree, start_line, end_line)
|
|
230
288
|
|
|
@@ -236,13 +294,99 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
236
294
|
except SyntaxError:
|
|
237
295
|
return None
|
|
238
296
|
|
|
297
|
+
@staticmethod
|
|
298
|
+
def _build_line_to_node_index(tree: ast.Module | None) -> dict[int, list[ast.AST]] | None:
|
|
299
|
+
"""Build an index mapping each line number to overlapping AST nodes.
|
|
300
|
+
|
|
301
|
+
Performance optimization: This allows O(1) lookups instead of O(n) ast.walk() calls.
|
|
302
|
+
For a file with 5,144 nodes and 673 hash windows, this reduces 3.46M node operations
|
|
303
|
+
to just ~3,365 relevant node checks (99.9% reduction).
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
tree: Parsed AST tree (None if parsing failed)
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
Dictionary mapping line numbers to list of AST nodes overlapping that line,
|
|
310
|
+
or None if tree is None
|
|
311
|
+
"""
|
|
312
|
+
if tree is None:
|
|
313
|
+
return None
|
|
314
|
+
|
|
315
|
+
line_to_nodes: dict[int, list[ast.AST]] = {}
|
|
316
|
+
for node in ast.walk(tree):
|
|
317
|
+
if PythonDuplicateAnalyzer._node_has_line_info(node):
|
|
318
|
+
PythonDuplicateAnalyzer._add_node_to_index(node, line_to_nodes)
|
|
319
|
+
|
|
320
|
+
return line_to_nodes
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
def _node_has_line_info(node: ast.AST) -> bool:
|
|
324
|
+
"""Check if node has valid line number information."""
|
|
325
|
+
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
326
|
+
return False
|
|
327
|
+
return node.lineno is not None and node.end_lineno is not None
|
|
328
|
+
|
|
329
|
+
@staticmethod
|
|
330
|
+
def _add_node_to_index(node: ast.AST, line_to_nodes: dict[int, list[ast.AST]]) -> None:
|
|
331
|
+
"""Add node to all lines it overlaps in the index."""
|
|
332
|
+
for line_num in range(node.lineno, node.end_lineno + 1): # type: ignore[attr-defined]
|
|
333
|
+
if line_num not in line_to_nodes:
|
|
334
|
+
line_to_nodes[line_num] = []
|
|
335
|
+
line_to_nodes[line_num].append(node)
|
|
336
|
+
|
|
239
337
|
def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
|
|
240
|
-
"""Check if any AST node overlaps and matches single-statement pattern.
|
|
338
|
+
"""Check if any AST node overlaps and matches single-statement pattern.
|
|
339
|
+
|
|
340
|
+
Performance optimization: Use line-to-node index for O(1) lookups instead of O(n) ast.walk().
|
|
341
|
+
"""
|
|
342
|
+
if self._line_to_nodes is not None:
|
|
343
|
+
return self._check_nodes_via_index(start_line, end_line)
|
|
344
|
+
return self._check_nodes_via_walk(tree, start_line, end_line)
|
|
345
|
+
|
|
346
|
+
def _check_nodes_via_index(self, start_line: int, end_line: int) -> bool:
|
|
347
|
+
"""Check nodes using line-to-node index for O(1) lookups."""
|
|
348
|
+
candidates = self._collect_candidate_nodes_from_index(start_line, end_line)
|
|
349
|
+
return self._any_node_matches_pattern(candidates, start_line, end_line)
|
|
350
|
+
|
|
351
|
+
def _collect_candidate_nodes_from_index(self, start_line: int, end_line: int) -> set[ast.AST]:
|
|
352
|
+
"""Collect unique nodes that overlap with the line range from index."""
|
|
353
|
+
candidate_nodes: set[ast.AST] = set()
|
|
354
|
+
for line_num in range(start_line, end_line + 1):
|
|
355
|
+
if self._line_to_nodes and line_num in self._line_to_nodes:
|
|
356
|
+
candidate_nodes.update(self._line_to_nodes[line_num])
|
|
357
|
+
return candidate_nodes
|
|
358
|
+
|
|
359
|
+
def _any_node_matches_pattern(
|
|
360
|
+
self, nodes: set[ast.AST], start_line: int, end_line: int
|
|
361
|
+
) -> bool:
|
|
362
|
+
"""Check if any node matches single-statement pattern."""
|
|
363
|
+
for node in nodes:
|
|
364
|
+
if self._is_single_statement_pattern(node, start_line, end_line):
|
|
365
|
+
return True
|
|
366
|
+
return False
|
|
367
|
+
|
|
368
|
+
def _check_nodes_via_walk(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
|
|
369
|
+
"""Check nodes using ast.walk() fallback for tests or standalone calls."""
|
|
241
370
|
for node in ast.walk(tree):
|
|
242
|
-
if self.
|
|
371
|
+
if self._node_matches_via_walk(node, start_line, end_line):
|
|
243
372
|
return True
|
|
244
373
|
return False
|
|
245
374
|
|
|
375
|
+
def _node_matches_via_walk(self, node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
376
|
+
"""Check if a single node overlaps and matches pattern."""
|
|
377
|
+
if not self._node_overlaps_range(node, start_line, end_line):
|
|
378
|
+
return False
|
|
379
|
+
return self._is_single_statement_pattern(node, start_line, end_line)
|
|
380
|
+
|
|
381
|
+
@staticmethod
|
|
382
|
+
def _node_overlaps_range(node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
383
|
+
"""Check if node overlaps with the given line range."""
|
|
384
|
+
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
385
|
+
return False
|
|
386
|
+
node_end = node.end_lineno
|
|
387
|
+
node_start = node.lineno
|
|
388
|
+
return not (node_end < start_line or node_start > end_line)
|
|
389
|
+
|
|
246
390
|
def _node_overlaps_and_matches(self, node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
247
391
|
"""Check if node overlaps with range and matches single-statement pattern."""
|
|
248
392
|
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
@@ -514,4 +658,11 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
514
658
|
return True
|
|
515
659
|
return False
|
|
516
660
|
|
|
517
|
-
return self._check_ast_context(
|
|
661
|
+
return self._check_ast_context(
|
|
662
|
+
lines,
|
|
663
|
+
start_line,
|
|
664
|
+
end_line,
|
|
665
|
+
AST_LOOKBACK_LINES,
|
|
666
|
+
AST_LOOKFORWARD_LINES,
|
|
667
|
+
is_within_class_body,
|
|
668
|
+
)
|
|
@@ -1,23 +1,21 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Purpose: Storage initialization for DRY linter
|
|
3
3
|
|
|
4
|
-
Scope: Initializes DuplicateStorage with
|
|
4
|
+
Scope: Initializes DuplicateStorage with SQLite storage
|
|
5
5
|
|
|
6
|
-
Overview: Handles storage initialization based on DRY configuration. Creates SQLite
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
Overview: Handles storage initialization based on DRY configuration. Creates SQLite storage in
|
|
7
|
+
either memory or tempfile mode based on config.storage_mode. Separates initialization logic
|
|
8
|
+
from main linter rule to maintain SRP compliance.
|
|
9
9
|
|
|
10
|
-
Dependencies: BaseLintContext, DRYConfig, DRYCache, DuplicateStorage
|
|
10
|
+
Dependencies: BaseLintContext, DRYConfig, DRYCache, DuplicateStorage
|
|
11
11
|
|
|
12
12
|
Exports: StorageInitializer class
|
|
13
13
|
|
|
14
14
|
Interfaces: StorageInitializer.initialize(context, config) -> DuplicateStorage
|
|
15
15
|
|
|
16
|
-
Implementation: Creates
|
|
16
|
+
Implementation: Creates DRYCache with storage_mode, delegates to DuplicateStorage for management
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
from pathlib import Path
|
|
20
|
-
|
|
21
19
|
from src.core.base import BaseLintContext
|
|
22
20
|
|
|
23
21
|
from .cache import DRYCache
|
|
@@ -36,16 +34,9 @@ class StorageInitializer:
|
|
|
36
34
|
config: DRY configuration
|
|
37
35
|
|
|
38
36
|
Returns:
|
|
39
|
-
DuplicateStorage instance
|
|
37
|
+
DuplicateStorage instance with SQLite storage
|
|
40
38
|
"""
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
# Use SQLite cache
|
|
44
|
-
metadata = getattr(context, "metadata", {})
|
|
45
|
-
project_root = metadata.get("_project_root", Path.cwd())
|
|
46
|
-
cache_path = project_root / config.cache_path
|
|
47
|
-
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
48
|
-
cache = DRYCache(cache_path)
|
|
49
|
-
# else: cache = None triggers in-memory fallback in DuplicateStorage
|
|
39
|
+
# Create SQLite storage (in-memory or tempfile based on config)
|
|
40
|
+
cache = DRYCache(storage_mode=config.storage_mode)
|
|
50
41
|
|
|
51
42
|
return DuplicateStorage(cache)
|
src/linters/dry/token_hasher.py
CHANGED
|
@@ -33,26 +33,80 @@ class TokenHasher:
|
|
|
33
33
|
List of normalized code lines (non-empty, comments removed, imports filtered)
|
|
34
34
|
"""
|
|
35
35
|
lines = []
|
|
36
|
+
in_multiline_import = False
|
|
36
37
|
|
|
37
38
|
for line in code.split("\n"):
|
|
38
|
-
|
|
39
|
-
line = self._strip_comments(line)
|
|
40
|
-
|
|
41
|
-
# Normalize whitespace (collapse to single space)
|
|
42
|
-
line = " ".join(line.split())
|
|
43
|
-
|
|
44
|
-
# Skip empty lines
|
|
39
|
+
line = self._normalize_line(line)
|
|
45
40
|
if not line:
|
|
46
41
|
continue
|
|
47
42
|
|
|
48
|
-
#
|
|
49
|
-
|
|
43
|
+
# Update multi-line import state and check if line should be skipped
|
|
44
|
+
in_multiline_import, should_skip = self._should_skip_import_line(
|
|
45
|
+
line, in_multiline_import
|
|
46
|
+
)
|
|
47
|
+
if should_skip:
|
|
50
48
|
continue
|
|
51
49
|
|
|
52
50
|
lines.append(line)
|
|
53
51
|
|
|
54
52
|
return lines
|
|
55
53
|
|
|
54
|
+
def _normalize_line(self, line: str) -> str:
|
|
55
|
+
"""Normalize a line by removing comments and excess whitespace.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
line: Raw source code line
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Normalized line (empty string if line has no content)
|
|
62
|
+
"""
|
|
63
|
+
line = self._strip_comments(line)
|
|
64
|
+
return " ".join(line.split())
|
|
65
|
+
|
|
66
|
+
def _should_skip_import_line(self, line: str, in_multiline_import: bool) -> tuple[bool, bool]:
|
|
67
|
+
"""Determine if an import line should be skipped.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
line: Normalized code line
|
|
71
|
+
in_multiline_import: Whether we're currently inside a multi-line import
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Tuple of (new_in_multiline_import_state, should_skip_line)
|
|
75
|
+
"""
|
|
76
|
+
if self._is_multiline_import_start(line):
|
|
77
|
+
return True, True
|
|
78
|
+
|
|
79
|
+
if in_multiline_import:
|
|
80
|
+
return self._handle_multiline_import_continuation(line)
|
|
81
|
+
|
|
82
|
+
if self._is_import_statement(line):
|
|
83
|
+
return False, True
|
|
84
|
+
|
|
85
|
+
return False, False
|
|
86
|
+
|
|
87
|
+
def _is_multiline_import_start(self, line: str) -> bool:
|
|
88
|
+
"""Check if line starts a multi-line import statement.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
line: Normalized code line
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
True if line starts a multi-line import (has opening paren but no closing)
|
|
95
|
+
"""
|
|
96
|
+
return self._is_import_statement(line) and "(" in line and ")" not in line
|
|
97
|
+
|
|
98
|
+
def _handle_multiline_import_continuation(self, line: str) -> tuple[bool, bool]:
|
|
99
|
+
"""Handle a line that's part of a multi-line import.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
line: Normalized code line inside a multi-line import
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Tuple of (still_in_import, should_skip)
|
|
106
|
+
"""
|
|
107
|
+
closes_import = ")" in line
|
|
108
|
+
return not closes_import, True
|
|
109
|
+
|
|
56
110
|
def _strip_comments(self, line: str) -> str:
|
|
57
111
|
"""Remove comments from line (Python # and // style).
|
|
58
112
|
|
|
@@ -186,20 +186,22 @@ class TypeScriptDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.vi
|
|
|
186
186
|
List of (original_line_number, normalized_code) tuples
|
|
187
187
|
"""
|
|
188
188
|
lines_with_numbers = []
|
|
189
|
+
in_multiline_import = False
|
|
189
190
|
|
|
190
191
|
for line_num, line in enumerate(content.split("\n"), start=1):
|
|
191
192
|
# Skip JSDoc comment lines
|
|
192
193
|
if line_num in jsdoc_lines:
|
|
193
194
|
continue
|
|
194
195
|
|
|
195
|
-
|
|
196
|
-
line = self._hasher._strip_comments(line) # pylint: disable=protected-access
|
|
197
|
-
line = " ".join(line.split())
|
|
198
|
-
|
|
196
|
+
line = self._hasher._normalize_line(line) # pylint: disable=protected-access
|
|
199
197
|
if not line:
|
|
200
198
|
continue
|
|
201
199
|
|
|
202
|
-
if
|
|
200
|
+
# Update multi-line import state and check if line should be skipped
|
|
201
|
+
in_multiline_import, should_skip = self._hasher._should_skip_import_line( # pylint: disable=protected-access
|
|
202
|
+
line, in_multiline_import
|
|
203
|
+
)
|
|
204
|
+
if should_skip:
|
|
203
205
|
continue
|
|
204
206
|
|
|
205
207
|
lines_with_numbers.append((line_num, line))
|
|
@@ -18,6 +18,9 @@ Implementation: Iterates through sorted violations, keeps first of each overlapp
|
|
|
18
18
|
|
|
19
19
|
from src.core.types import Violation
|
|
20
20
|
|
|
21
|
+
# Default fallback for line count when parsing fails
|
|
22
|
+
DEFAULT_FALLBACK_LINE_COUNT = 5
|
|
23
|
+
|
|
21
24
|
|
|
22
25
|
class ViolationFilter:
|
|
23
26
|
"""Filters overlapping violations."""
|
|
@@ -88,4 +91,4 @@ class ViolationFilter:
|
|
|
88
91
|
end = message.index(" lines")
|
|
89
92
|
return int(message[start:end])
|
|
90
93
|
except (ValueError, IndexError):
|
|
91
|
-
return
|
|
94
|
+
return DEFAULT_FALLBACK_LINE_COUNT # Default fallback
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File: src/linters/file_header/__init__.py
|
|
3
|
+
Purpose: File header linter module initialization
|
|
4
|
+
Exports: FileHeaderRule
|
|
5
|
+
Depends: linter.FileHeaderRule
|
|
6
|
+
Implements: Module-level exports for clean API
|
|
7
|
+
Related: linter.py for main rule implementation
|
|
8
|
+
|
|
9
|
+
Overview:
|
|
10
|
+
Initializes the file header linter module providing multi-language file header
|
|
11
|
+
validation with mandatory field checking, atemporal language detection, and configuration
|
|
12
|
+
support. Main entry point for file header linting functionality.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
from src.linters.file_header import FileHeaderRule
|
|
16
|
+
rule = FileHeaderRule()
|
|
17
|
+
violations = rule.check(context)
|
|
18
|
+
|
|
19
|
+
Notes: Follows standard Python module initialization pattern with __all__ export control
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .linter import FileHeaderRule
|
|
23
|
+
|
|
24
|
+
__all__ = ["FileHeaderRule"]
|