thailint 0.1.5__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +7 -2
- src/analyzers/__init__.py +23 -0
- src/analyzers/typescript_base.py +148 -0
- src/api.py +1 -1
- src/cli.py +1111 -144
- src/config.py +12 -33
- src/core/base.py +102 -5
- src/core/cli_utils.py +206 -0
- src/core/config_parser.py +126 -0
- src/core/linter_utils.py +168 -0
- src/core/registry.py +17 -92
- src/core/rule_discovery.py +132 -0
- src/core/violation_builder.py +122 -0
- src/linter_config/ignore.py +112 -40
- src/linter_config/loader.py +3 -13
- src/linters/dry/__init__.py +23 -0
- src/linters/dry/base_token_analyzer.py +76 -0
- src/linters/dry/block_filter.py +265 -0
- src/linters/dry/block_grouper.py +59 -0
- src/linters/dry/cache.py +172 -0
- src/linters/dry/cache_query.py +61 -0
- src/linters/dry/config.py +134 -0
- src/linters/dry/config_loader.py +44 -0
- src/linters/dry/deduplicator.py +120 -0
- src/linters/dry/duplicate_storage.py +63 -0
- src/linters/dry/file_analyzer.py +90 -0
- src/linters/dry/inline_ignore.py +140 -0
- src/linters/dry/linter.py +163 -0
- src/linters/dry/python_analyzer.py +668 -0
- src/linters/dry/storage_initializer.py +42 -0
- src/linters/dry/token_hasher.py +169 -0
- src/linters/dry/typescript_analyzer.py +592 -0
- src/linters/dry/violation_builder.py +74 -0
- src/linters/dry/violation_filter.py +94 -0
- src/linters/dry/violation_generator.py +174 -0
- src/linters/file_header/__init__.py +24 -0
- src/linters/file_header/atemporal_detector.py +87 -0
- src/linters/file_header/config.py +66 -0
- src/linters/file_header/field_validator.py +69 -0
- src/linters/file_header/linter.py +313 -0
- src/linters/file_header/python_parser.py +86 -0
- src/linters/file_header/violation_builder.py +78 -0
- src/linters/file_placement/config_loader.py +86 -0
- src/linters/file_placement/directory_matcher.py +80 -0
- src/linters/file_placement/linter.py +262 -471
- src/linters/file_placement/path_resolver.py +61 -0
- src/linters/file_placement/pattern_matcher.py +55 -0
- src/linters/file_placement/pattern_validator.py +106 -0
- src/linters/file_placement/rule_checker.py +229 -0
- src/linters/file_placement/violation_factory.py +177 -0
- src/linters/magic_numbers/__init__.py +48 -0
- src/linters/magic_numbers/config.py +82 -0
- src/linters/magic_numbers/context_analyzer.py +247 -0
- src/linters/magic_numbers/linter.py +516 -0
- src/linters/magic_numbers/python_analyzer.py +76 -0
- src/linters/magic_numbers/typescript_analyzer.py +218 -0
- src/linters/magic_numbers/violation_builder.py +98 -0
- src/linters/nesting/__init__.py +6 -2
- src/linters/nesting/config.py +17 -4
- src/linters/nesting/linter.py +81 -168
- src/linters/nesting/typescript_analyzer.py +39 -102
- src/linters/nesting/typescript_function_extractor.py +130 -0
- src/linters/nesting/violation_builder.py +139 -0
- src/linters/print_statements/__init__.py +53 -0
- src/linters/print_statements/config.py +83 -0
- src/linters/print_statements/linter.py +430 -0
- src/linters/print_statements/python_analyzer.py +155 -0
- src/linters/print_statements/typescript_analyzer.py +135 -0
- src/linters/print_statements/violation_builder.py +98 -0
- src/linters/srp/__init__.py +99 -0
- src/linters/srp/class_analyzer.py +113 -0
- src/linters/srp/config.py +82 -0
- src/linters/srp/heuristics.py +89 -0
- src/linters/srp/linter.py +234 -0
- src/linters/srp/metrics_evaluator.py +47 -0
- src/linters/srp/python_analyzer.py +72 -0
- src/linters/srp/typescript_analyzer.py +75 -0
- src/linters/srp/typescript_metrics_calculator.py +90 -0
- src/linters/srp/violation_builder.py +117 -0
- src/orchestrator/core.py +54 -9
- src/templates/thailint_config_template.yaml +158 -0
- src/utils/__init__.py +4 -0
- src/utils/project_root.py +203 -0
- thailint-0.5.0.dist-info/METADATA +1286 -0
- thailint-0.5.0.dist-info/RECORD +96 -0
- {thailint-0.1.5.dist-info → thailint-0.5.0.dist-info}/WHEEL +1 -1
- src/.ai/layout.yaml +0 -48
- thailint-0.1.5.dist-info/METADATA +0 -629
- thailint-0.1.5.dist-info/RECORD +0 -28
- {thailint-0.1.5.dist-info → thailint-0.5.0.dist-info}/entry_points.txt +0 -0
- {thailint-0.1.5.dist-info → thailint-0.5.0.dist-info/licenses}/LICENSE +0 -0
src/linter_config/ignore.py
CHANGED
|
@@ -5,18 +5,18 @@ Scope: Multi-level ignore system across repository, directory, file, method, and
|
|
|
5
5
|
|
|
6
6
|
Overview: Implements a sophisticated ignore directive system that allows developers to suppress
|
|
7
7
|
linting violations at five different granularity levels, from entire repository patterns down
|
|
8
|
-
to individual lines of code. Repository level uses
|
|
9
|
-
glob patterns for excluding files like build artifacts and dependencies.
|
|
10
|
-
first 10 lines for ignore-file directives (performance optimization).
|
|
11
|
-
ignore-next-line directives placed before functions. Line level enables
|
|
12
|
-
at the end of code lines. All levels support rule-specific ignores
|
|
13
|
-
[rule-id] and wildcard rule matching (literals.* matches literals.magic-number).
|
|
14
|
-
should_ignore_violation() method provides unified checking across all levels, integrating
|
|
8
|
+
to individual lines of code. Repository level uses global ignore patterns from .thailint.yaml
|
|
9
|
+
with gitignore-style glob patterns for excluding files like build artifacts and dependencies.
|
|
10
|
+
File level scans the first 10 lines for ignore-file directives (performance optimization).
|
|
11
|
+
Method level supports ignore-next-line directives placed before functions. Line level enables
|
|
12
|
+
inline ignore comments at the end of code lines. All levels support rule-specific ignores
|
|
13
|
+
using bracket syntax [rule-id] and wildcard rule matching (literals.* matches literals.magic-number).
|
|
14
|
+
The should_ignore_violation() method provides unified checking across all levels, integrating
|
|
15
15
|
with the violation reporting system to filter out suppressed violations before displaying
|
|
16
16
|
results to users.
|
|
17
17
|
|
|
18
18
|
Dependencies: fnmatch for gitignore-style pattern matching, re for regex-based directive parsing,
|
|
19
|
-
pathlib for file operations, Violation type for violation checking
|
|
19
|
+
pathlib for file operations, Violation type for violation checking, yaml for config loading
|
|
20
20
|
|
|
21
21
|
Exports: IgnoreDirectiveParser class
|
|
22
22
|
|
|
@@ -25,9 +25,9 @@ Interfaces: is_ignored(file_path: Path) -> bool for repo-level checking,
|
|
|
25
25
|
has_line_ignore(code: str, line_num: int, rule_id: str | None) -> bool for line-level,
|
|
26
26
|
should_ignore_violation(violation: Violation, file_content: str) -> bool for unified checking
|
|
27
27
|
|
|
28
|
-
Implementation: Gitignore-style pattern matching with fnmatch,
|
|
29
|
-
performance, regex-based directive parsing with rule ID extraction,
|
|
30
|
-
with prefix comparison, graceful error handling for malformed directives
|
|
28
|
+
Implementation: Gitignore-style pattern matching with fnmatch, YAML config loading for global patterns,
|
|
29
|
+
first-10-lines scanning for performance, regex-based directive parsing with rule ID extraction,
|
|
30
|
+
wildcard rule matching with prefix comparison, graceful error handling for malformed directives
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
import fnmatch
|
|
@@ -35,6 +35,8 @@ import re
|
|
|
35
35
|
from pathlib import Path
|
|
36
36
|
from typing import TYPE_CHECKING
|
|
37
37
|
|
|
38
|
+
import yaml
|
|
39
|
+
|
|
38
40
|
if TYPE_CHECKING:
|
|
39
41
|
from src.core.types import Violation
|
|
40
42
|
|
|
@@ -56,22 +58,58 @@ class IgnoreDirectiveParser:
|
|
|
56
58
|
self.repo_patterns = self._load_repo_ignores()
|
|
57
59
|
|
|
58
60
|
def _load_repo_ignores(self) -> list[str]:
|
|
59
|
-
"""Load .thailintignore
|
|
61
|
+
"""Load global ignore patterns from .thailintignore or .thailint.yaml."""
|
|
62
|
+
# First, try to load from .thailintignore (gitignore-style)
|
|
63
|
+
thailintignore = self.project_root / ".thailintignore"
|
|
64
|
+
if thailintignore.exists():
|
|
65
|
+
return self._parse_thailintignore_file(thailintignore)
|
|
66
|
+
|
|
67
|
+
# Fall back to .thailint.yaml
|
|
68
|
+
config_file = self.project_root / ".thailint.yaml"
|
|
69
|
+
if config_file.exists():
|
|
70
|
+
return self._parse_config_file(config_file)
|
|
71
|
+
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
def _parse_thailintignore_file(self, ignore_file: Path) -> list[str]:
|
|
75
|
+
"""Parse .thailintignore file (gitignore-style).
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
ignore_file: Path to .thailintignore file
|
|
60
79
|
|
|
61
80
|
Returns:
|
|
62
|
-
List of
|
|
81
|
+
List of ignore patterns
|
|
63
82
|
"""
|
|
64
|
-
|
|
65
|
-
|
|
83
|
+
try:
|
|
84
|
+
content = ignore_file.read_text(encoding="utf-8")
|
|
85
|
+
patterns = []
|
|
86
|
+
for line in content.splitlines():
|
|
87
|
+
line = line.strip()
|
|
88
|
+
# Skip empty lines and comments
|
|
89
|
+
if line and not line.startswith("#"):
|
|
90
|
+
patterns.append(line)
|
|
91
|
+
return patterns
|
|
92
|
+
except (OSError, UnicodeDecodeError):
|
|
66
93
|
return []
|
|
67
94
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
95
|
+
def _parse_config_file(self, config_file: Path) -> list[str]:
|
|
96
|
+
"""Parse YAML config file and extract ignore patterns."""
|
|
97
|
+
try:
|
|
98
|
+
config = yaml.safe_load(config_file.read_text(encoding="utf-8"))
|
|
99
|
+
return self._extract_ignore_patterns(config)
|
|
100
|
+
except (yaml.YAMLError, OSError, UnicodeDecodeError):
|
|
101
|
+
return []
|
|
102
|
+
|
|
103
|
+
@staticmethod
|
|
104
|
+
def _extract_ignore_patterns(config: dict | None) -> list[str]:
|
|
105
|
+
"""Extract ignore patterns from config dict."""
|
|
106
|
+
if not config or not isinstance(config, dict):
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
ignore_patterns = config.get("ignore", [])
|
|
110
|
+
if isinstance(ignore_patterns, list):
|
|
111
|
+
return [str(pattern) for pattern in ignore_patterns]
|
|
112
|
+
return []
|
|
75
113
|
|
|
76
114
|
def is_ignored(self, file_path: Path) -> bool:
|
|
77
115
|
"""Check if file matches repository-level ignore patterns.
|
|
@@ -122,13 +160,33 @@ class IgnoreDirectiveParser:
|
|
|
122
160
|
|
|
123
161
|
def _has_ignore_directive_marker(self, line: str) -> bool:
|
|
124
162
|
"""Check if line contains an ignore directive marker."""
|
|
125
|
-
|
|
163
|
+
line_lower = line.lower()
|
|
164
|
+
return "# thailint: ignore-file" in line_lower or "# design-lint: ignore-file" in line_lower
|
|
126
165
|
|
|
127
166
|
def _check_specific_rule_ignore(self, line: str, rule_id: str) -> bool:
|
|
128
167
|
"""Check if line ignores a specific rule."""
|
|
129
|
-
|
|
130
|
-
if
|
|
131
|
-
|
|
168
|
+
# Check for bracket syntax: # thailint: ignore-file[rule1, rule2]
|
|
169
|
+
if self._check_bracket_syntax_file_ignore(line, rule_id):
|
|
170
|
+
return True
|
|
171
|
+
|
|
172
|
+
# Check for space-separated syntax: # thailint: ignore-file rule1 rule2
|
|
173
|
+
return self._check_space_syntax_file_ignore(line, rule_id)
|
|
174
|
+
|
|
175
|
+
def _check_bracket_syntax_file_ignore(self, line: str, rule_id: str) -> bool:
|
|
176
|
+
"""Check bracket syntax for file-level ignore."""
|
|
177
|
+
bracket_match = re.search(r"ignore-file\[([^\]]+)\]", line, re.IGNORECASE)
|
|
178
|
+
if bracket_match:
|
|
179
|
+
ignored_rules = [r.strip() for r in bracket_match.group(1).split(",")]
|
|
180
|
+
return any(self._rule_matches(rule_id, r) for r in ignored_rules)
|
|
181
|
+
return False
|
|
182
|
+
|
|
183
|
+
def _check_space_syntax_file_ignore(self, line: str, rule_id: str) -> bool:
|
|
184
|
+
"""Check space-separated syntax for file-level ignore."""
|
|
185
|
+
space_match = re.search(r"ignore-file\s+([^\s#]+(?:\s+[^\s#]+)*)", line, re.IGNORECASE)
|
|
186
|
+
if space_match:
|
|
187
|
+
ignored_rules = [
|
|
188
|
+
r.strip() for r in re.split(r"[,\s]+", space_match.group(1)) if r.strip()
|
|
189
|
+
]
|
|
132
190
|
return any(self._rule_matches(rule_id, r) for r in ignored_rules)
|
|
133
191
|
return False
|
|
134
192
|
|
|
@@ -171,27 +229,28 @@ class IgnoreDirectiveParser:
|
|
|
171
229
|
|
|
172
230
|
def _has_line_ignore_marker(self, code: str) -> bool:
|
|
173
231
|
"""Check if code line has ignore marker."""
|
|
232
|
+
code_lower = code.lower()
|
|
174
233
|
return (
|
|
175
|
-
"# thailint: ignore" in
|
|
176
|
-
or "# design-lint: ignore" in
|
|
177
|
-
or "// thailint: ignore" in
|
|
178
|
-
or "// design-lint: ignore" in
|
|
234
|
+
"# thailint: ignore" in code_lower
|
|
235
|
+
or "# design-lint: ignore" in code_lower
|
|
236
|
+
or "// thailint: ignore" in code_lower
|
|
237
|
+
or "// design-lint: ignore" in code_lower
|
|
179
238
|
)
|
|
180
239
|
|
|
181
240
|
def _check_specific_rule_in_line(self, code: str, rule_id: str) -> bool:
|
|
182
241
|
"""Check if line's ignore directive matches specific rule."""
|
|
183
242
|
# Check for bracket syntax: # thailint: ignore[rule1, rule2]
|
|
184
|
-
bracket_match = re.search(r"ignore\[([^\]]+)\]", code)
|
|
243
|
+
bracket_match = re.search(r"ignore\[([^\]]+)\]", code, re.IGNORECASE)
|
|
185
244
|
if bracket_match:
|
|
186
245
|
return self._check_bracket_rules(bracket_match.group(1), rule_id)
|
|
187
246
|
|
|
188
247
|
# Check for space-separated syntax: # thailint: ignore rule1 rule2
|
|
189
|
-
space_match = re.search(r"ignore\s+([^\s#]+(?:\s+[^\s#]+)*)", code)
|
|
248
|
+
space_match = re.search(r"ignore\s+([^\s#]+(?:\s+[^\s#]+)*)", code, re.IGNORECASE)
|
|
190
249
|
if space_match:
|
|
191
250
|
return self._check_space_separated_rules(space_match.group(1), rule_id)
|
|
192
251
|
|
|
193
252
|
# No specific rules - check for "ignore-all"
|
|
194
|
-
return "ignore-all" in code
|
|
253
|
+
return "ignore-all" in code.lower()
|
|
195
254
|
|
|
196
255
|
def _check_bracket_rules(self, rules_text: str, rule_id: str) -> bool:
|
|
197
256
|
"""Check if bracketed rules match the rule ID."""
|
|
@@ -231,17 +290,21 @@ class IgnoreDirectiveParser:
|
|
|
231
290
|
Returns:
|
|
232
291
|
True if rule matches pattern.
|
|
233
292
|
"""
|
|
234
|
-
|
|
293
|
+
# Case-insensitive comparison
|
|
294
|
+
rule_id_lower = rule_id.lower()
|
|
295
|
+
pattern_lower = pattern.lower()
|
|
296
|
+
|
|
297
|
+
if pattern_lower.endswith("*"):
|
|
235
298
|
# Wildcard match: literals.* matches literals.magic-number
|
|
236
|
-
prefix =
|
|
237
|
-
return
|
|
299
|
+
prefix = pattern_lower[:-1]
|
|
300
|
+
return rule_id_lower.startswith(prefix)
|
|
238
301
|
|
|
239
302
|
# Exact match
|
|
240
|
-
if
|
|
303
|
+
if rule_id_lower == pattern_lower:
|
|
241
304
|
return True
|
|
242
305
|
|
|
243
306
|
# Prefix match: "nesting" matches "nesting.excessive-depth"
|
|
244
|
-
if
|
|
307
|
+
if rule_id_lower.startswith(pattern_lower + "."):
|
|
245
308
|
return True
|
|
246
309
|
|
|
247
310
|
return False
|
|
@@ -293,18 +356,27 @@ class IgnoreDirectiveParser:
|
|
|
293
356
|
file_path = Path(violation.file_path)
|
|
294
357
|
|
|
295
358
|
# Repository and file level checks
|
|
296
|
-
if self._is_ignored_at_file_level(file_path, violation.rule_id):
|
|
359
|
+
if self._is_ignored_at_file_level(file_path, violation.rule_id, file_content):
|
|
297
360
|
return True
|
|
298
361
|
|
|
299
362
|
# Line-based checks
|
|
300
363
|
return self._is_ignored_in_content(file_content, violation)
|
|
301
364
|
|
|
302
|
-
def _is_ignored_at_file_level(self, file_path: Path, rule_id: str) -> bool:
|
|
365
|
+
def _is_ignored_at_file_level(self, file_path: Path, rule_id: str, file_content: str) -> bool:
|
|
303
366
|
"""Check repository and file level ignores."""
|
|
304
367
|
if self.is_ignored(file_path):
|
|
305
368
|
return True
|
|
369
|
+
# Check content first (for tests with in-memory content)
|
|
370
|
+
if self._has_file_ignore_in_content(file_content, rule_id):
|
|
371
|
+
return True
|
|
372
|
+
# Fall back to reading from disk if file exists
|
|
306
373
|
return self.has_file_ignore(file_path, rule_id)
|
|
307
374
|
|
|
375
|
+
def _has_file_ignore_in_content(self, file_content: str, rule_id: str | None) -> bool:
|
|
376
|
+
"""Check if file content has ignore-file directive."""
|
|
377
|
+
lines = file_content.splitlines()[:10] # Check first 10 lines
|
|
378
|
+
return any(self._check_line_for_ignore(line, rule_id) for line in lines)
|
|
379
|
+
|
|
308
380
|
def _is_ignored_in_content(self, file_content: str, violation: "Violation") -> bool:
|
|
309
381
|
"""Check content-based ignores (block, line, method level)."""
|
|
310
382
|
lines = file_content.splitlines()
|
src/linter_config/loader.py
CHANGED
|
@@ -25,11 +25,10 @@ Implementation: Extension-based format detection (.yaml/.yml vs .json), yaml.saf
|
|
|
25
25
|
for security, empty dict handling for null YAML, ValueError for unsupported formats
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
import json
|
|
29
28
|
from pathlib import Path
|
|
30
29
|
from typing import Any
|
|
31
30
|
|
|
32
|
-
import
|
|
31
|
+
from src.core.config_parser import parse_config_file
|
|
33
32
|
|
|
34
33
|
|
|
35
34
|
class LinterConfigLoader:
|
|
@@ -49,21 +48,12 @@ class LinterConfigLoader:
|
|
|
49
48
|
Configuration dictionary.
|
|
50
49
|
|
|
51
50
|
Raises:
|
|
52
|
-
|
|
53
|
-
yaml.YAMLError: If YAML is malformed.
|
|
54
|
-
json.JSONDecodeError: If JSON is malformed.
|
|
51
|
+
ConfigParseError: If file format is unsupported or parsing fails.
|
|
55
52
|
"""
|
|
56
53
|
if not config_path.exists():
|
|
57
54
|
return self.get_defaults()
|
|
58
55
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
with config_path.open(encoding="utf-8") as f:
|
|
62
|
-
if suffix in [".yaml", ".yml"]:
|
|
63
|
-
return yaml.safe_load(f) or {}
|
|
64
|
-
if suffix == ".json":
|
|
65
|
-
return json.load(f)
|
|
66
|
-
raise ValueError(f"Unsupported config format: {suffix}")
|
|
56
|
+
return parse_config_file(config_path)
|
|
67
57
|
|
|
68
58
|
def get_defaults(self) -> dict[str, Any]:
|
|
69
59
|
"""Get default configuration.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: DRY (Don't Repeat Yourself) linter module exports
|
|
3
|
+
|
|
4
|
+
Scope: Module-level exports for DRY linter components
|
|
5
|
+
|
|
6
|
+
Overview: Provides centralized exports for the DRY linter module components. Exposes the main
|
|
7
|
+
DRYRule class for duplicate code detection, configuration dataclass, and analyzer components.
|
|
8
|
+
Simplifies imports for consumers by providing a single import point for all DRY linter
|
|
9
|
+
functionality. Follows the established pattern from nesting and SRP linters.
|
|
10
|
+
|
|
11
|
+
Dependencies: linter.DRYRule, config.DRYConfig
|
|
12
|
+
|
|
13
|
+
Exports: DRYRule (main rule class), DRYConfig (configuration)
|
|
14
|
+
|
|
15
|
+
Interfaces: Module-level __all__ list defining public API
|
|
16
|
+
|
|
17
|
+
Implementation: Standard Python module with explicit exports via __all__
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from .config import DRYConfig
|
|
21
|
+
from .linter import DRYRule
|
|
22
|
+
|
|
23
|
+
__all__ = ["DRYRule", "DRYConfig"]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Base class for token-based duplicate code analysis
|
|
3
|
+
|
|
4
|
+
Scope: Common duplicate detection workflow for Python and TypeScript analyzers
|
|
5
|
+
|
|
6
|
+
Overview: Provides shared infrastructure for token-based duplicate code detection across different
|
|
7
|
+
programming languages. Implements common workflow of tokenization, rolling hash window generation,
|
|
8
|
+
and CodeBlock creation. Subclasses provide language-specific filtering (e.g., interface filtering
|
|
9
|
+
for TypeScript). Eliminates duplication between PythonDuplicateAnalyzer and TypeScriptDuplicateAnalyzer
|
|
10
|
+
by extracting shared analyze() method pattern and CodeBlock creation logic.
|
|
11
|
+
|
|
12
|
+
Dependencies: TokenHasher, CodeBlock, DRYConfig, pathlib.Path
|
|
13
|
+
|
|
14
|
+
Exports: BaseTokenAnalyzer class
|
|
15
|
+
|
|
16
|
+
Interfaces: BaseTokenAnalyzer.analyze(file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]
|
|
17
|
+
|
|
18
|
+
Implementation: Template method pattern with extension point for language-specific block filtering
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from .cache import CodeBlock
|
|
24
|
+
from .config import DRYConfig
|
|
25
|
+
from .token_hasher import TokenHasher
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class BaseTokenAnalyzer:
|
|
29
|
+
"""Base analyzer for token-based duplicate detection."""
|
|
30
|
+
|
|
31
|
+
def __init__(self) -> None:
|
|
32
|
+
"""Initialize analyzer with token hasher."""
|
|
33
|
+
self._hasher = TokenHasher()
|
|
34
|
+
|
|
35
|
+
def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
|
|
36
|
+
"""Analyze file for duplicate code blocks.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file_path: Path to source file
|
|
40
|
+
content: File content
|
|
41
|
+
config: DRY configuration
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
List of CodeBlock instances with hash values
|
|
45
|
+
"""
|
|
46
|
+
lines = self._hasher.tokenize(content)
|
|
47
|
+
windows = self._hasher.rolling_hash(lines, config.min_duplicate_lines)
|
|
48
|
+
|
|
49
|
+
blocks = []
|
|
50
|
+
for hash_val, start_line, end_line, snippet in windows:
|
|
51
|
+
if self._should_include_block(content, start_line, end_line):
|
|
52
|
+
block = CodeBlock(
|
|
53
|
+
file_path=file_path,
|
|
54
|
+
start_line=start_line,
|
|
55
|
+
end_line=end_line,
|
|
56
|
+
snippet=snippet,
|
|
57
|
+
hash_value=hash_val,
|
|
58
|
+
)
|
|
59
|
+
blocks.append(block)
|
|
60
|
+
|
|
61
|
+
return blocks
|
|
62
|
+
|
|
63
|
+
def _should_include_block(self, content: str, start_line: int, end_line: int) -> bool:
|
|
64
|
+
"""Determine if block should be included.
|
|
65
|
+
|
|
66
|
+
Extension point for language-specific filtering.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
content: File content
|
|
70
|
+
start_line: Block start line
|
|
71
|
+
end_line: Block end line
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
True if block should be included, False to filter out
|
|
75
|
+
"""
|
|
76
|
+
return True
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Extensible filter system for DRY duplicate detection
|
|
3
|
+
|
|
4
|
+
Scope: Filters out false positive duplications (API boilerplate, keyword arguments, etc.)
|
|
5
|
+
|
|
6
|
+
Overview: Provides an extensible architecture for filtering duplicate code blocks that are
|
|
7
|
+
not meaningful duplications. Includes base filter interface and built-in filters for
|
|
8
|
+
common false positive patterns like keyword-only function arguments, import groups,
|
|
9
|
+
and API call boilerplate. New filters can be added by subclassing BaseBlockFilter.
|
|
10
|
+
|
|
11
|
+
Dependencies: ast, re, typing
|
|
12
|
+
|
|
13
|
+
Exports: BaseBlockFilter, BlockFilterRegistry, KeywordArgumentFilter, ImportGroupFilter
|
|
14
|
+
|
|
15
|
+
Interfaces: BaseBlockFilter.should_filter(code_block, file_content) -> bool
|
|
16
|
+
|
|
17
|
+
Implementation: Strategy pattern with filter registry for extensibility
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import ast
|
|
21
|
+
import re
|
|
22
|
+
from abc import ABC, abstractmethod
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Protocol
|
|
25
|
+
|
|
26
|
+
# Default filter threshold constants
|
|
27
|
+
DEFAULT_KEYWORD_ARG_THRESHOLD = 0.8
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class CodeBlock(Protocol):
|
|
31
|
+
"""Protocol for code blocks (matches cache.CodeBlock)."""
|
|
32
|
+
|
|
33
|
+
file_path: Path
|
|
34
|
+
start_line: int
|
|
35
|
+
end_line: int
|
|
36
|
+
snippet: str
|
|
37
|
+
hash_value: int
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class BaseBlockFilter(ABC):
|
|
41
|
+
"""Base class for duplicate block filters."""
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def should_filter(self, block: CodeBlock, file_content: str) -> bool:
|
|
45
|
+
"""Determine if a code block should be filtered out.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
block: Code block to evaluate
|
|
49
|
+
file_content: Full file content for context
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
True if block should be filtered (not reported as duplicate)
|
|
53
|
+
"""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def get_name(self) -> str:
|
|
58
|
+
"""Get filter name for configuration and logging."""
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class KeywordArgumentFilter(BaseBlockFilter):
|
|
63
|
+
"""Filters blocks that are primarily keyword arguments in function calls.
|
|
64
|
+
|
|
65
|
+
Detects patterns like:
|
|
66
|
+
message=message,
|
|
67
|
+
severity=Severity.ERROR,
|
|
68
|
+
suggestion=suggestion,
|
|
69
|
+
|
|
70
|
+
These are common in builder patterns and API calls.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, threshold: float = DEFAULT_KEYWORD_ARG_THRESHOLD):
|
|
74
|
+
"""Initialize filter.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
threshold: Minimum percentage of lines that must be keyword args (0.0-1.0)
|
|
78
|
+
"""
|
|
79
|
+
self.threshold = threshold
|
|
80
|
+
# Pattern: optional whitespace, identifier, =, value, optional comma
|
|
81
|
+
self._kwarg_pattern = re.compile(r"^\s*\w+\s*=\s*.+,?\s*$")
|
|
82
|
+
|
|
83
|
+
def should_filter(self, block: CodeBlock, file_content: str) -> bool:
|
|
84
|
+
"""Check if block is primarily keyword arguments.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
block: Code block to evaluate
|
|
88
|
+
file_content: Full file content for context
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
True if block should be filtered
|
|
92
|
+
"""
|
|
93
|
+
lines = file_content.split("\n")[block.start_line - 1 : block.end_line]
|
|
94
|
+
|
|
95
|
+
if not lines:
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
# Count lines that match keyword argument pattern
|
|
99
|
+
kwarg_lines = sum(1 for line in lines if self._kwarg_pattern.match(line))
|
|
100
|
+
|
|
101
|
+
# Filter if most lines are keyword arguments
|
|
102
|
+
ratio = kwarg_lines / len(lines)
|
|
103
|
+
if ratio >= self.threshold:
|
|
104
|
+
return self._is_inside_function_call(block, file_content)
|
|
105
|
+
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
def _is_inside_function_call(self, block: CodeBlock, file_content: str) -> bool:
|
|
109
|
+
"""Verify the block is inside a function call, not standalone code."""
|
|
110
|
+
try:
|
|
111
|
+
tree = ast.parse(file_content)
|
|
112
|
+
except SyntaxError:
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
# Find if any Call node contains the block
|
|
116
|
+
for node in ast.walk(tree):
|
|
117
|
+
if isinstance(node, ast.Call) and self._check_multiline_containment(node, block):
|
|
118
|
+
return True
|
|
119
|
+
return False
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
def _check_multiline_containment(node: ast.Call, block: CodeBlock) -> bool:
|
|
123
|
+
"""Check if Call node is multiline and contains block."""
|
|
124
|
+
if not KeywordArgumentFilter._has_valid_line_info(node):
|
|
125
|
+
return False
|
|
126
|
+
|
|
127
|
+
# After validation, these are guaranteed to be non-None integers
|
|
128
|
+
# Use type: ignore to suppress MyPy's inability to understand runtime validation
|
|
129
|
+
is_multiline = node.lineno < node.end_lineno # type: ignore[operator]
|
|
130
|
+
contains_block = (
|
|
131
|
+
node.lineno <= block.start_line and node.end_lineno >= block.end_line # type: ignore[operator]
|
|
132
|
+
)
|
|
133
|
+
return is_multiline and contains_block
|
|
134
|
+
|
|
135
|
+
@staticmethod
|
|
136
|
+
def _has_valid_line_info(node: ast.Call) -> bool:
|
|
137
|
+
"""Check if node has valid line information.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
node: AST Call node to check
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
True if node has valid line number attributes
|
|
144
|
+
"""
|
|
145
|
+
if not hasattr(node, "lineno"):
|
|
146
|
+
return False
|
|
147
|
+
if not hasattr(node, "end_lineno"):
|
|
148
|
+
return False
|
|
149
|
+
if node.lineno is None:
|
|
150
|
+
return False
|
|
151
|
+
if node.end_lineno is None:
|
|
152
|
+
return False
|
|
153
|
+
return True
|
|
154
|
+
|
|
155
|
+
def get_name(self) -> str:
|
|
156
|
+
"""Get filter name."""
|
|
157
|
+
return "keyword_argument_filter"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class ImportGroupFilter(BaseBlockFilter):
|
|
161
|
+
"""Filters blocks that are just import statements.
|
|
162
|
+
|
|
163
|
+
Import organization often creates similar patterns that aren't meaningful duplication.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def should_filter(self, block: CodeBlock, file_content: str) -> bool:
|
|
167
|
+
"""Check if block is only import statements.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
block: Code block to evaluate
|
|
171
|
+
file_content: Full file content
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
True if block should be filtered
|
|
175
|
+
"""
|
|
176
|
+
lines = file_content.split("\n")[block.start_line - 1 : block.end_line]
|
|
177
|
+
|
|
178
|
+
for line in lines:
|
|
179
|
+
stripped = line.strip()
|
|
180
|
+
if not stripped:
|
|
181
|
+
continue
|
|
182
|
+
if not (stripped.startswith("import ") or stripped.startswith("from ")):
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
return True
|
|
186
|
+
|
|
187
|
+
def get_name(self) -> str:
|
|
188
|
+
"""Get filter name."""
|
|
189
|
+
return "import_group_filter"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class BlockFilterRegistry:
|
|
193
|
+
"""Registry for managing duplicate block filters."""
|
|
194
|
+
|
|
195
|
+
def __init__(self) -> None:
|
|
196
|
+
"""Initialize empty registry."""
|
|
197
|
+
self._filters: list[BaseBlockFilter] = []
|
|
198
|
+
self._enabled_filters: set[str] = set()
|
|
199
|
+
|
|
200
|
+
def register(self, filter_instance: BaseBlockFilter) -> None:
|
|
201
|
+
"""Register a filter.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
filter_instance: Filter to register
|
|
205
|
+
"""
|
|
206
|
+
self._filters.append(filter_instance)
|
|
207
|
+
self._enabled_filters.add(filter_instance.get_name())
|
|
208
|
+
|
|
209
|
+
def enable_filter(self, filter_name: str) -> None:
|
|
210
|
+
"""Enable a specific filter by name.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
filter_name: Name of filter to enable
|
|
214
|
+
"""
|
|
215
|
+
self._enabled_filters.add(filter_name)
|
|
216
|
+
|
|
217
|
+
def disable_filter(self, filter_name: str) -> None:
|
|
218
|
+
"""Disable a specific filter by name.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
filter_name: Name of filter to disable
|
|
222
|
+
"""
|
|
223
|
+
self._enabled_filters.discard(filter_name)
|
|
224
|
+
|
|
225
|
+
def should_filter_block(self, block: CodeBlock, file_content: str) -> bool:
|
|
226
|
+
"""Check if any enabled filter wants to filter this block.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
block: Code block to evaluate
|
|
230
|
+
file_content: Full file content
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
True if block should be filtered out
|
|
234
|
+
"""
|
|
235
|
+
for filter_instance in self._filters:
|
|
236
|
+
if filter_instance.get_name() not in self._enabled_filters:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
if filter_instance.should_filter(block, file_content):
|
|
240
|
+
return True
|
|
241
|
+
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
def get_enabled_filters(self) -> list[str]:
|
|
245
|
+
"""Get list of enabled filter names.
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
List of enabled filter names
|
|
249
|
+
"""
|
|
250
|
+
return sorted(self._enabled_filters)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def create_default_registry() -> BlockFilterRegistry:
|
|
254
|
+
"""Create registry with default filters.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
BlockFilterRegistry with common filters registered
|
|
258
|
+
"""
|
|
259
|
+
registry = BlockFilterRegistry()
|
|
260
|
+
|
|
261
|
+
# Register built-in filters
|
|
262
|
+
registry.register(KeywordArgumentFilter(threshold=DEFAULT_KEYWORD_ARG_THRESHOLD))
|
|
263
|
+
registry.register(ImportGroupFilter())
|
|
264
|
+
|
|
265
|
+
return registry
|