thailint 0.5.0__py3-none-any.whl → 0.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +1 -0
- src/analyzers/__init__.py +4 -3
- src/analyzers/ast_utils.py +54 -0
- src/analyzers/rust_base.py +155 -0
- src/analyzers/rust_context.py +141 -0
- src/analyzers/typescript_base.py +4 -0
- src/cli/__init__.py +30 -0
- src/cli/__main__.py +22 -0
- src/cli/config.py +480 -0
- src/cli/config_merge.py +241 -0
- src/cli/linters/__init__.py +67 -0
- src/cli/linters/code_patterns.py +270 -0
- src/cli/linters/code_smells.py +342 -0
- src/cli/linters/documentation.py +83 -0
- src/cli/linters/performance.py +287 -0
- src/cli/linters/shared.py +331 -0
- src/cli/linters/structure.py +327 -0
- src/cli/linters/structure_quality.py +328 -0
- src/cli/main.py +120 -0
- src/cli/utils.py +395 -0
- src/cli_main.py +37 -0
- src/config.py +38 -25
- src/core/base.py +7 -2
- src/core/cli_utils.py +19 -2
- src/core/config_parser.py +5 -2
- src/core/constants.py +54 -0
- src/core/linter_utils.py +95 -6
- src/core/python_lint_rule.py +101 -0
- src/core/registry.py +1 -1
- src/core/rule_discovery.py +147 -84
- src/core/types.py +13 -0
- src/core/violation_builder.py +78 -15
- src/core/violation_utils.py +69 -0
- src/formatters/__init__.py +22 -0
- src/formatters/sarif.py +202 -0
- src/linter_config/directive_markers.py +109 -0
- src/linter_config/ignore.py +254 -395
- src/linter_config/loader.py +45 -12
- src/linter_config/pattern_utils.py +65 -0
- src/linter_config/rule_matcher.py +89 -0
- src/linters/collection_pipeline/__init__.py +90 -0
- src/linters/collection_pipeline/any_all_analyzer.py +281 -0
- src/linters/collection_pipeline/ast_utils.py +40 -0
- src/linters/collection_pipeline/config.py +75 -0
- src/linters/collection_pipeline/continue_analyzer.py +94 -0
- src/linters/collection_pipeline/detector.py +360 -0
- src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
- src/linters/collection_pipeline/linter.py +420 -0
- src/linters/collection_pipeline/suggestion_builder.py +130 -0
- src/linters/cqs/__init__.py +54 -0
- src/linters/cqs/config.py +55 -0
- src/linters/cqs/function_analyzer.py +201 -0
- src/linters/cqs/input_detector.py +139 -0
- src/linters/cqs/linter.py +159 -0
- src/linters/cqs/output_detector.py +84 -0
- src/linters/cqs/python_analyzer.py +54 -0
- src/linters/cqs/types.py +82 -0
- src/linters/cqs/typescript_cqs_analyzer.py +61 -0
- src/linters/cqs/typescript_function_analyzer.py +192 -0
- src/linters/cqs/typescript_input_detector.py +203 -0
- src/linters/cqs/typescript_output_detector.py +117 -0
- src/linters/cqs/violation_builder.py +94 -0
- src/linters/dry/base_token_analyzer.py +16 -9
- src/linters/dry/block_filter.py +120 -20
- src/linters/dry/block_grouper.py +4 -0
- src/linters/dry/cache.py +104 -10
- src/linters/dry/cache_query.py +4 -0
- src/linters/dry/config.py +54 -11
- src/linters/dry/constant.py +92 -0
- src/linters/dry/constant_matcher.py +223 -0
- src/linters/dry/constant_violation_builder.py +98 -0
- src/linters/dry/duplicate_storage.py +5 -4
- src/linters/dry/file_analyzer.py +4 -2
- src/linters/dry/inline_ignore.py +7 -16
- src/linters/dry/linter.py +183 -48
- src/linters/dry/python_analyzer.py +60 -439
- src/linters/dry/python_constant_extractor.py +100 -0
- src/linters/dry/single_statement_detector.py +417 -0
- src/linters/dry/token_hasher.py +116 -112
- src/linters/dry/typescript_analyzer.py +68 -382
- src/linters/dry/typescript_constant_extractor.py +138 -0
- src/linters/dry/typescript_statement_detector.py +255 -0
- src/linters/dry/typescript_value_extractor.py +70 -0
- src/linters/dry/violation_builder.py +4 -0
- src/linters/dry/violation_filter.py +5 -4
- src/linters/dry/violation_generator.py +71 -14
- src/linters/file_header/atemporal_detector.py +68 -50
- src/linters/file_header/base_parser.py +93 -0
- src/linters/file_header/bash_parser.py +66 -0
- src/linters/file_header/config.py +90 -16
- src/linters/file_header/css_parser.py +70 -0
- src/linters/file_header/field_validator.py +36 -33
- src/linters/file_header/linter.py +140 -144
- src/linters/file_header/markdown_parser.py +130 -0
- src/linters/file_header/python_parser.py +14 -58
- src/linters/file_header/typescript_parser.py +73 -0
- src/linters/file_header/violation_builder.py +13 -12
- src/linters/file_placement/config_loader.py +3 -1
- src/linters/file_placement/directory_matcher.py +4 -0
- src/linters/file_placement/linter.py +66 -34
- src/linters/file_placement/pattern_matcher.py +41 -6
- src/linters/file_placement/pattern_validator.py +31 -12
- src/linters/file_placement/rule_checker.py +12 -7
- src/linters/lazy_ignores/__init__.py +43 -0
- src/linters/lazy_ignores/config.py +74 -0
- src/linters/lazy_ignores/directive_utils.py +164 -0
- src/linters/lazy_ignores/header_parser.py +177 -0
- src/linters/lazy_ignores/linter.py +158 -0
- src/linters/lazy_ignores/matcher.py +168 -0
- src/linters/lazy_ignores/python_analyzer.py +209 -0
- src/linters/lazy_ignores/rule_id_utils.py +180 -0
- src/linters/lazy_ignores/skip_detector.py +298 -0
- src/linters/lazy_ignores/types.py +71 -0
- src/linters/lazy_ignores/typescript_analyzer.py +146 -0
- src/linters/lazy_ignores/violation_builder.py +135 -0
- src/linters/lbyl/__init__.py +31 -0
- src/linters/lbyl/config.py +63 -0
- src/linters/lbyl/linter.py +67 -0
- src/linters/lbyl/pattern_detectors/__init__.py +53 -0
- src/linters/lbyl/pattern_detectors/base.py +63 -0
- src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
- src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
- src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
- src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
- src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
- src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
- src/linters/lbyl/python_analyzer.py +215 -0
- src/linters/lbyl/violation_builder.py +354 -0
- src/linters/magic_numbers/context_analyzer.py +227 -225
- src/linters/magic_numbers/linter.py +28 -82
- src/linters/magic_numbers/python_analyzer.py +4 -16
- src/linters/magic_numbers/typescript_analyzer.py +9 -12
- src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
- src/linters/method_property/__init__.py +49 -0
- src/linters/method_property/config.py +138 -0
- src/linters/method_property/linter.py +414 -0
- src/linters/method_property/python_analyzer.py +473 -0
- src/linters/method_property/violation_builder.py +119 -0
- src/linters/nesting/linter.py +24 -16
- src/linters/nesting/python_analyzer.py +4 -0
- src/linters/nesting/typescript_analyzer.py +6 -12
- src/linters/nesting/violation_builder.py +1 -0
- src/linters/performance/__init__.py +91 -0
- src/linters/performance/config.py +43 -0
- src/linters/performance/constants.py +49 -0
- src/linters/performance/linter.py +149 -0
- src/linters/performance/python_analyzer.py +365 -0
- src/linters/performance/regex_analyzer.py +312 -0
- src/linters/performance/regex_linter.py +139 -0
- src/linters/performance/typescript_analyzer.py +236 -0
- src/linters/performance/violation_builder.py +160 -0
- src/linters/print_statements/config.py +7 -12
- src/linters/print_statements/linter.py +26 -43
- src/linters/print_statements/python_analyzer.py +91 -93
- src/linters/print_statements/typescript_analyzer.py +15 -25
- src/linters/print_statements/violation_builder.py +12 -14
- src/linters/srp/class_analyzer.py +11 -7
- src/linters/srp/heuristics.py +56 -22
- src/linters/srp/linter.py +15 -16
- src/linters/srp/python_analyzer.py +55 -20
- src/linters/srp/typescript_metrics_calculator.py +110 -50
- src/linters/stateless_class/__init__.py +25 -0
- src/linters/stateless_class/config.py +58 -0
- src/linters/stateless_class/linter.py +349 -0
- src/linters/stateless_class/python_analyzer.py +290 -0
- src/linters/stringly_typed/__init__.py +36 -0
- src/linters/stringly_typed/config.py +189 -0
- src/linters/stringly_typed/context_filter.py +451 -0
- src/linters/stringly_typed/function_call_violation_builder.py +135 -0
- src/linters/stringly_typed/ignore_checker.py +100 -0
- src/linters/stringly_typed/ignore_utils.py +51 -0
- src/linters/stringly_typed/linter.py +376 -0
- src/linters/stringly_typed/python/__init__.py +33 -0
- src/linters/stringly_typed/python/analyzer.py +348 -0
- src/linters/stringly_typed/python/call_tracker.py +175 -0
- src/linters/stringly_typed/python/comparison_tracker.py +257 -0
- src/linters/stringly_typed/python/condition_extractor.py +134 -0
- src/linters/stringly_typed/python/conditional_detector.py +179 -0
- src/linters/stringly_typed/python/constants.py +21 -0
- src/linters/stringly_typed/python/match_analyzer.py +94 -0
- src/linters/stringly_typed/python/validation_detector.py +189 -0
- src/linters/stringly_typed/python/variable_extractor.py +96 -0
- src/linters/stringly_typed/storage.py +620 -0
- src/linters/stringly_typed/storage_initializer.py +45 -0
- src/linters/stringly_typed/typescript/__init__.py +28 -0
- src/linters/stringly_typed/typescript/analyzer.py +157 -0
- src/linters/stringly_typed/typescript/call_tracker.py +335 -0
- src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
- src/linters/stringly_typed/violation_generator.py +419 -0
- src/orchestrator/core.py +252 -14
- src/orchestrator/language_detector.py +5 -3
- src/templates/thailint_config_template.yaml +196 -0
- src/utils/project_root.py +3 -0
- thailint-0.15.3.dist-info/METADATA +187 -0
- thailint-0.15.3.dist-info/RECORD +226 -0
- thailint-0.15.3.dist-info/entry_points.txt +4 -0
- src/cli.py +0 -1665
- thailint-0.5.0.dist-info/METADATA +0 -1286
- thailint-0.5.0.dist-info/RECORD +0 -96
- thailint-0.5.0.dist-info/entry_points.txt +0 -4
- {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +0 -0
- {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Builds Violation objects from CQSPattern instances
|
|
3
|
+
|
|
4
|
+
Scope: Violation message formatting and suggestion generation for CQS violations
|
|
5
|
+
|
|
6
|
+
Overview: Provides build_cqs_violation function that converts a CQSPattern with a detected
|
|
7
|
+
CQS violation into a Violation object with properly formatted message. Message includes
|
|
8
|
+
function name (with class prefix for methods), lists INPUT operations with line numbers,
|
|
9
|
+
lists OUTPUT operations with line numbers, and provides actionable suggestion to split
|
|
10
|
+
the function into separate query and command functions.
|
|
11
|
+
|
|
12
|
+
Dependencies: CQSPattern, InputOperation, OutputOperation, Violation, Severity
|
|
13
|
+
|
|
14
|
+
Exports: build_cqs_violation
|
|
15
|
+
|
|
16
|
+
Interfaces: build_cqs_violation(pattern: CQSPattern) -> Violation
|
|
17
|
+
|
|
18
|
+
Implementation: String formatting with INPUT/OUTPUT line number aggregation
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from src.core.types import Severity, Violation
|
|
22
|
+
|
|
23
|
+
from .types import CQSPattern
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _format_inputs(pattern: CQSPattern) -> str:
|
|
27
|
+
"""Format INPUT operations for violation message.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
pattern: CQSPattern containing inputs
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Formatted string listing INPUTs with line numbers
|
|
34
|
+
"""
|
|
35
|
+
if not pattern.inputs:
|
|
36
|
+
return ""
|
|
37
|
+
|
|
38
|
+
parts = [f"Line {inp.line}: {inp.target} = {inp.expression}" for inp in pattern.inputs]
|
|
39
|
+
return "; ".join(parts)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _format_outputs(pattern: CQSPattern) -> str:
|
|
43
|
+
"""Format OUTPUT operations for violation message.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
pattern: CQSPattern containing outputs
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Formatted string listing OUTPUTs with line numbers
|
|
50
|
+
"""
|
|
51
|
+
if not pattern.outputs:
|
|
52
|
+
return ""
|
|
53
|
+
|
|
54
|
+
parts = [f"Line {out.line}: {out.expression}" for out in pattern.outputs]
|
|
55
|
+
return "; ".join(parts)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def build_cqs_violation(pattern: CQSPattern) -> Violation:
|
|
59
|
+
"""Build a Violation object from a CQSPattern.
|
|
60
|
+
|
|
61
|
+
Creates a violation message that includes:
|
|
62
|
+
- Function name (with class prefix for methods)
|
|
63
|
+
- List of INPUT operations with line numbers
|
|
64
|
+
- List of OUTPUT operations with line numbers
|
|
65
|
+
- Suggestion to split into query and command functions
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
pattern: CQSPattern representing a function that violates CQS
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Violation object with formatted message and suggestion
|
|
72
|
+
"""
|
|
73
|
+
full_name = pattern.get_full_name()
|
|
74
|
+
|
|
75
|
+
# Build detailed message
|
|
76
|
+
inputs_str = _format_inputs(pattern)
|
|
77
|
+
outputs_str = _format_outputs(pattern)
|
|
78
|
+
|
|
79
|
+
message = (
|
|
80
|
+
f"Function '{full_name}' violates CQS: mixes queries and commands. "
|
|
81
|
+
f"INPUTs: {inputs_str}. OUTPUTs: {outputs_str}."
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
suggestion = "Split into separate query and command functions."
|
|
85
|
+
|
|
86
|
+
return Violation(
|
|
87
|
+
rule_id="cqs",
|
|
88
|
+
file_path=pattern.file_path,
|
|
89
|
+
line=pattern.line,
|
|
90
|
+
column=pattern.column,
|
|
91
|
+
message=message,
|
|
92
|
+
severity=Severity.ERROR,
|
|
93
|
+
suggestion=suggestion,
|
|
94
|
+
)
|
|
@@ -9,28 +9,35 @@ Overview: Provides shared infrastructure for token-based duplicate code detectio
|
|
|
9
9
|
for TypeScript). Eliminates duplication between PythonDuplicateAnalyzer and TypeScriptDuplicateAnalyzer
|
|
10
10
|
by extracting shared analyze() method pattern and CodeBlock creation logic.
|
|
11
11
|
|
|
12
|
-
Dependencies:
|
|
12
|
+
Dependencies: token_hasher module functions, CodeBlock, DRYConfig, pathlib.Path
|
|
13
13
|
|
|
14
14
|
Exports: BaseTokenAnalyzer class
|
|
15
15
|
|
|
16
16
|
Interfaces: BaseTokenAnalyzer.analyze(file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]
|
|
17
17
|
|
|
18
18
|
Implementation: Template method pattern with extension point for language-specific block filtering
|
|
19
|
+
|
|
20
|
+
Suppressions:
|
|
21
|
+
- stateless-class: BaseTokenAnalyzer is an intentional template method base class.
|
|
22
|
+
Subclasses (PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer) override
|
|
23
|
+
_should_include_block for language-specific filtering. Statelessness is by design
|
|
24
|
+
since state was moved to module-level functions in token_hasher.
|
|
19
25
|
"""
|
|
20
26
|
|
|
21
27
|
from pathlib import Path
|
|
22
28
|
|
|
29
|
+
from . import token_hasher
|
|
23
30
|
from .cache import CodeBlock
|
|
24
31
|
from .config import DRYConfig
|
|
25
|
-
from .token_hasher import TokenHasher
|
|
26
32
|
|
|
27
33
|
|
|
28
|
-
class BaseTokenAnalyzer:
|
|
29
|
-
"""Base analyzer for token-based duplicate detection.
|
|
34
|
+
class BaseTokenAnalyzer: # thailint: ignore[stateless-class] - Template method base class for inheritance
|
|
35
|
+
"""Base analyzer for token-based duplicate detection.
|
|
30
36
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
37
|
+
This is intentionally a base class for polymorphism. Subclasses
|
|
38
|
+
(PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer) override
|
|
39
|
+
_should_include_block for language-specific filtering.
|
|
40
|
+
"""
|
|
34
41
|
|
|
35
42
|
def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
|
|
36
43
|
"""Analyze file for duplicate code blocks.
|
|
@@ -43,8 +50,8 @@ class BaseTokenAnalyzer:
|
|
|
43
50
|
Returns:
|
|
44
51
|
List of CodeBlock instances with hash values
|
|
45
52
|
"""
|
|
46
|
-
lines =
|
|
47
|
-
windows =
|
|
53
|
+
lines = token_hasher.tokenize(content)
|
|
54
|
+
windows = token_hasher.rolling_hash(lines, config.min_duplicate_lines)
|
|
48
55
|
|
|
49
56
|
blocks = []
|
|
50
57
|
for hash_val, start_line, end_line, snippet in windows:
|
src/linters/dry/block_filter.py
CHANGED
|
@@ -10,11 +10,15 @@ Overview: Provides an extensible architecture for filtering duplicate code block
|
|
|
10
10
|
|
|
11
11
|
Dependencies: ast, re, typing
|
|
12
12
|
|
|
13
|
-
Exports: BaseBlockFilter, BlockFilterRegistry, KeywordArgumentFilter, ImportGroupFilter
|
|
13
|
+
Exports: BaseBlockFilter, BlockFilterRegistry, KeywordArgumentFilter, ImportGroupFilter,
|
|
14
|
+
LoggerCallFilter, ExceptionReraiseFilter
|
|
14
15
|
|
|
15
16
|
Interfaces: BaseBlockFilter.should_filter(code_block, file_content) -> bool
|
|
16
17
|
|
|
17
18
|
Implementation: Strategy pattern with filter registry for extensibility
|
|
19
|
+
|
|
20
|
+
Suppressions:
|
|
21
|
+
- type:ignore[operator]: Tree-sitter Node comparison operations (optional dependency)
|
|
18
22
|
"""
|
|
19
23
|
|
|
20
24
|
import ast
|
|
@@ -53,9 +57,10 @@ class BaseBlockFilter(ABC):
|
|
|
53
57
|
"""
|
|
54
58
|
pass
|
|
55
59
|
|
|
60
|
+
@property
|
|
56
61
|
@abstractmethod
|
|
57
|
-
def
|
|
58
|
-
"""
|
|
62
|
+
def name(self) -> str:
|
|
63
|
+
"""Filter name for configuration and logging."""
|
|
59
64
|
pass
|
|
60
65
|
|
|
61
66
|
|
|
@@ -113,10 +118,10 @@ class KeywordArgumentFilter(BaseBlockFilter):
|
|
|
113
118
|
return False
|
|
114
119
|
|
|
115
120
|
# Find if any Call node contains the block
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
121
|
+
return any(
|
|
122
|
+
isinstance(node, ast.Call) and self._check_multiline_containment(node, block)
|
|
123
|
+
for node in ast.walk(tree)
|
|
124
|
+
)
|
|
120
125
|
|
|
121
126
|
@staticmethod
|
|
122
127
|
def _check_multiline_containment(node: ast.Call, block: CodeBlock) -> bool:
|
|
@@ -152,8 +157,9 @@ class KeywordArgumentFilter(BaseBlockFilter):
|
|
|
152
157
|
return False
|
|
153
158
|
return True
|
|
154
159
|
|
|
155
|
-
|
|
156
|
-
|
|
160
|
+
@property
|
|
161
|
+
def name(self) -> str:
|
|
162
|
+
"""Filter name."""
|
|
157
163
|
return "keyword_argument_filter"
|
|
158
164
|
|
|
159
165
|
|
|
@@ -163,6 +169,10 @@ class ImportGroupFilter(BaseBlockFilter):
|
|
|
163
169
|
Import organization often creates similar patterns that aren't meaningful duplication.
|
|
164
170
|
"""
|
|
165
171
|
|
|
172
|
+
def __init__(self) -> None:
|
|
173
|
+
"""Initialize the import group filter."""
|
|
174
|
+
pass # Stateless filter for import blocks
|
|
175
|
+
|
|
166
176
|
def should_filter(self, block: CodeBlock, file_content: str) -> bool:
|
|
167
177
|
"""Check if block is only import statements.
|
|
168
178
|
|
|
@@ -184,11 +194,105 @@ class ImportGroupFilter(BaseBlockFilter):
|
|
|
184
194
|
|
|
185
195
|
return True
|
|
186
196
|
|
|
187
|
-
|
|
188
|
-
|
|
197
|
+
@property
|
|
198
|
+
def name(self) -> str:
|
|
199
|
+
"""Filter name."""
|
|
189
200
|
return "import_group_filter"
|
|
190
201
|
|
|
191
202
|
|
|
203
|
+
class LoggerCallFilter(BaseBlockFilter):
|
|
204
|
+
"""Filters single-line logger calls that are idiomatic but appear similar.
|
|
205
|
+
|
|
206
|
+
Detects patterns like:
|
|
207
|
+
logger.debug(f"Command: {cmd}")
|
|
208
|
+
logger.info("Starting process...")
|
|
209
|
+
logging.warning("...")
|
|
210
|
+
|
|
211
|
+
These are contextually different despite structural similarity.
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
def __init__(self) -> None:
|
|
215
|
+
"""Initialize the logger call filter."""
|
|
216
|
+
# Pattern matches: logger.level(...) or logging.level(...)
|
|
217
|
+
self._logger_pattern = re.compile(
|
|
218
|
+
r"^\s*(self\.)?(logger|logging|log)\."
|
|
219
|
+
r"(debug|info|warning|error|critical|exception|log)\s*\("
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def should_filter(self, block: CodeBlock, file_content: str) -> bool:
|
|
223
|
+
"""Check if block is primarily single-line logger calls.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
block: Code block to evaluate
|
|
227
|
+
file_content: Full file content
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
True if block should be filtered
|
|
231
|
+
"""
|
|
232
|
+
lines = file_content.split("\n")[block.start_line - 1 : block.end_line]
|
|
233
|
+
non_empty = [s for line in lines if (s := line.strip())]
|
|
234
|
+
|
|
235
|
+
if not non_empty:
|
|
236
|
+
return False
|
|
237
|
+
|
|
238
|
+
# Filter if it's a single line that's a logger call
|
|
239
|
+
if len(non_empty) == 1:
|
|
240
|
+
return bool(self._logger_pattern.match(non_empty[0]))
|
|
241
|
+
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def name(self) -> str:
|
|
246
|
+
"""Filter name."""
|
|
247
|
+
return "logger_call_filter"
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class ExceptionReraiseFilter(BaseBlockFilter):
|
|
251
|
+
"""Filters idiomatic exception re-raising patterns.
|
|
252
|
+
|
|
253
|
+
Detects patterns like:
|
|
254
|
+
except SomeError as e:
|
|
255
|
+
raise NewError(...) from e
|
|
256
|
+
|
|
257
|
+
These are Python best practices for exception chaining.
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
def __init__(self) -> None:
|
|
261
|
+
"""Initialize the exception reraise filter."""
|
|
262
|
+
pass # Stateless filter
|
|
263
|
+
|
|
264
|
+
def should_filter(self, block: CodeBlock, file_content: str) -> bool:
|
|
265
|
+
"""Check if block is an exception re-raise pattern.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
block: Code block to evaluate
|
|
269
|
+
file_content: Full file content
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
True if block should be filtered
|
|
273
|
+
"""
|
|
274
|
+
lines = file_content.split("\n")[block.start_line - 1 : block.end_line]
|
|
275
|
+
stripped_lines = [s for line in lines if (s := line.strip())]
|
|
276
|
+
|
|
277
|
+
if len(stripped_lines) != 2:
|
|
278
|
+
return False
|
|
279
|
+
|
|
280
|
+
return self._is_except_raise_pattern(stripped_lines)
|
|
281
|
+
|
|
282
|
+
@staticmethod
|
|
283
|
+
def _is_except_raise_pattern(lines: list[str]) -> bool:
|
|
284
|
+
"""Check if lines form an except/raise pattern."""
|
|
285
|
+
first, second = lines[0], lines[1]
|
|
286
|
+
is_except = first.startswith("except ") and first.endswith(":")
|
|
287
|
+
is_raise = second.startswith("raise ") and " from " in second
|
|
288
|
+
return is_except and is_raise
|
|
289
|
+
|
|
290
|
+
@property
|
|
291
|
+
def name(self) -> str:
|
|
292
|
+
"""Filter name."""
|
|
293
|
+
return "exception_reraise_filter"
|
|
294
|
+
|
|
295
|
+
|
|
192
296
|
class BlockFilterRegistry:
|
|
193
297
|
"""Registry for managing duplicate block filters."""
|
|
194
298
|
|
|
@@ -204,7 +308,7 @@ class BlockFilterRegistry:
|
|
|
204
308
|
filter_instance: Filter to register
|
|
205
309
|
"""
|
|
206
310
|
self._filters.append(filter_instance)
|
|
207
|
-
self._enabled_filters.add(filter_instance.
|
|
311
|
+
self._enabled_filters.add(filter_instance.name)
|
|
208
312
|
|
|
209
313
|
def enable_filter(self, filter_name: str) -> None:
|
|
210
314
|
"""Enable a specific filter by name.
|
|
@@ -232,14 +336,8 @@ class BlockFilterRegistry:
|
|
|
232
336
|
Returns:
|
|
233
337
|
True if block should be filtered out
|
|
234
338
|
"""
|
|
235
|
-
for
|
|
236
|
-
|
|
237
|
-
continue
|
|
238
|
-
|
|
239
|
-
if filter_instance.should_filter(block, file_content):
|
|
240
|
-
return True
|
|
241
|
-
|
|
242
|
-
return False
|
|
339
|
+
enabled_filters = (f for f in self._filters if f.name in self._enabled_filters)
|
|
340
|
+
return any(f.should_filter(block, file_content) for f in enabled_filters)
|
|
243
341
|
|
|
244
342
|
def get_enabled_filters(self) -> list[str]:
|
|
245
343
|
"""Get list of enabled filter names.
|
|
@@ -261,5 +359,7 @@ def create_default_registry() -> BlockFilterRegistry:
|
|
|
261
359
|
# Register built-in filters
|
|
262
360
|
registry.register(KeywordArgumentFilter(threshold=DEFAULT_KEYWORD_ARG_THRESHOLD))
|
|
263
361
|
registry.register(ImportGroupFilter())
|
|
362
|
+
registry.register(LoggerCallFilter())
|
|
363
|
+
registry.register(ExceptionReraiseFilter())
|
|
264
364
|
|
|
265
365
|
return registry
|
src/linters/dry/block_grouper.py
CHANGED
|
@@ -26,6 +26,10 @@ from .cache import CodeBlock
|
|
|
26
26
|
class BlockGrouper:
|
|
27
27
|
"""Groups blocks and violations by file path."""
|
|
28
28
|
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
"""Initialize the block grouper."""
|
|
31
|
+
pass # Stateless grouper for code blocks
|
|
32
|
+
|
|
29
33
|
def group_blocks_by_file(self, blocks: list[CodeBlock]) -> dict[Path, list[CodeBlock]]:
|
|
30
34
|
"""Group blocks by file path.
|
|
31
35
|
|
src/linters/dry/cache.py
CHANGED
|
@@ -1,32 +1,45 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Purpose: SQLite storage manager for DRY linter duplicate detection
|
|
3
3
|
|
|
4
|
-
Scope: Code block storage and duplicate detection queries
|
|
4
|
+
Scope: Code block storage, constant storage, and duplicate detection queries
|
|
5
5
|
|
|
6
|
-
Overview: Implements in-memory or temporary-file SQLite storage for duplicate code detection
|
|
7
|
-
Stores code blocks with hash values
|
|
6
|
+
Overview: Implements in-memory or temporary-file SQLite storage for duplicate code detection
|
|
7
|
+
and duplicate constants detection. Stores code blocks with hash values and constants with
|
|
8
|
+
name/value pairs, enabling cross-file duplicate detection during a single linter run.
|
|
8
9
|
Supports both :memory: mode (fast, RAM-only) and tempfile mode (disk-backed for large projects).
|
|
9
10
|
No persistence between runs - storage is cleared when linter completes. Includes indexes for
|
|
10
|
-
fast hash lookups enabling cross-file
|
|
11
|
+
fast hash lookups and constant name lookups enabling efficient cross-file detection.
|
|
11
12
|
|
|
12
13
|
Dependencies: Python sqlite3 module (stdlib), tempfile module (stdlib), pathlib.Path, dataclasses
|
|
13
14
|
|
|
14
15
|
Exports: CodeBlock dataclass, DRYCache class
|
|
15
16
|
|
|
16
17
|
Interfaces: DRYCache.__init__(storage_mode), add_blocks(file_path, blocks),
|
|
17
|
-
find_duplicates_by_hash(hash_value),
|
|
18
|
+
find_duplicates_by_hash(hash_value), duplicate_hashes, add_constants(file_path, constants),
|
|
19
|
+
all_constants, get_duplicate_constant_names(), close()
|
|
18
20
|
|
|
19
|
-
Implementation: SQLite with
|
|
21
|
+
Implementation: SQLite with three tables (files, code_blocks, constants), indexed for performance,
|
|
20
22
|
storage_mode determines :memory: vs tempfile location, ACID transactions for reliability
|
|
23
|
+
|
|
24
|
+
Suppressions:
|
|
25
|
+
- consider-using-with: Tempfile managed by class lifecycle, not context manager
|
|
21
26
|
"""
|
|
22
27
|
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
23
30
|
import sqlite3
|
|
24
31
|
import tempfile
|
|
25
32
|
from dataclasses import dataclass
|
|
26
33
|
from pathlib import Path
|
|
34
|
+
from typing import TYPE_CHECKING
|
|
35
|
+
|
|
36
|
+
from src.core.constants import StorageMode
|
|
27
37
|
|
|
28
38
|
from .cache_query import CacheQueryService
|
|
29
39
|
|
|
40
|
+
if TYPE_CHECKING:
|
|
41
|
+
from .constant import ConstantInfo
|
|
42
|
+
|
|
30
43
|
|
|
31
44
|
@dataclass
|
|
32
45
|
class CodeBlock:
|
|
@@ -54,9 +67,9 @@ class DRYCache:
|
|
|
54
67
|
self._tempfile = None
|
|
55
68
|
|
|
56
69
|
# Create SQLite connection based on storage mode
|
|
57
|
-
if storage_mode ==
|
|
70
|
+
if storage_mode == StorageMode.MEMORY:
|
|
58
71
|
self.db = sqlite3.connect(":memory:")
|
|
59
|
-
elif storage_mode ==
|
|
72
|
+
elif storage_mode == StorageMode.TEMPFILE:
|
|
60
73
|
# Create temporary file that auto-deletes on close
|
|
61
74
|
# pylint: disable=consider-using-with
|
|
62
75
|
# Justification: tempfile must remain open for SQLite connection lifetime.
|
|
@@ -93,6 +106,19 @@ class DRYCache:
|
|
|
93
106
|
self.db.execute("CREATE INDEX IF NOT EXISTS idx_hash_value ON code_blocks(hash_value)")
|
|
94
107
|
self.db.execute("CREATE INDEX IF NOT EXISTS idx_file_path ON code_blocks(file_path)")
|
|
95
108
|
|
|
109
|
+
# Constants table for duplicate constant detection
|
|
110
|
+
self.db.execute(
|
|
111
|
+
"""CREATE TABLE IF NOT EXISTS constants (
|
|
112
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
113
|
+
file_path TEXT NOT NULL,
|
|
114
|
+
name TEXT NOT NULL,
|
|
115
|
+
line_number INTEGER NOT NULL,
|
|
116
|
+
value TEXT,
|
|
117
|
+
FOREIGN KEY (file_path) REFERENCES files(file_path) ON DELETE CASCADE
|
|
118
|
+
)"""
|
|
119
|
+
)
|
|
120
|
+
self.db.execute("CREATE INDEX IF NOT EXISTS idx_constant_name ON constants(name)")
|
|
121
|
+
|
|
96
122
|
self.db.commit()
|
|
97
123
|
|
|
98
124
|
def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
@@ -157,14 +183,82 @@ class DRYCache:
|
|
|
157
183
|
|
|
158
184
|
return blocks
|
|
159
185
|
|
|
160
|
-
|
|
161
|
-
|
|
186
|
+
@property
|
|
187
|
+
def duplicate_hashes(self) -> list[int]:
|
|
188
|
+
"""Hash values that appear 2+ times.
|
|
162
189
|
|
|
163
190
|
Returns:
|
|
164
191
|
List of hash values with 2 or more occurrences
|
|
165
192
|
"""
|
|
166
193
|
return self._query_service.get_duplicate_hashes(self.db)
|
|
167
194
|
|
|
195
|
+
def add_constants(
|
|
196
|
+
self,
|
|
197
|
+
file_path: Path,
|
|
198
|
+
constants: list[ConstantInfo],
|
|
199
|
+
) -> None:
|
|
200
|
+
"""Add constants to storage.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
file_path: Path to source file
|
|
204
|
+
constants: List of ConstantInfo instances to store
|
|
205
|
+
"""
|
|
206
|
+
if not constants:
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
for const in constants:
|
|
210
|
+
self.db.execute(
|
|
211
|
+
"""INSERT INTO constants
|
|
212
|
+
(file_path, name, line_number, value)
|
|
213
|
+
VALUES (?, ?, ?, ?)""",
|
|
214
|
+
(
|
|
215
|
+
str(file_path),
|
|
216
|
+
const.name,
|
|
217
|
+
const.line_number,
|
|
218
|
+
const.value,
|
|
219
|
+
),
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
self.db.commit()
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def all_constants(self) -> list[tuple[str, str, int, str | None]]:
|
|
226
|
+
"""All constants from storage.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
List of tuples: (file_path, name, line_number, value)
|
|
230
|
+
"""
|
|
231
|
+
cursor = self.db.execute("SELECT file_path, name, line_number, value FROM constants")
|
|
232
|
+
return cursor.fetchall()
|
|
233
|
+
|
|
234
|
+
def get_duplicate_constant_names(self) -> list[str]:
|
|
235
|
+
"""Get constant names that appear in 2+ files.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
List of constant names appearing in multiple files
|
|
239
|
+
"""
|
|
240
|
+
cursor = self.db.execute(
|
|
241
|
+
"""SELECT name FROM constants
|
|
242
|
+
GROUP BY name
|
|
243
|
+
HAVING COUNT(DISTINCT file_path) >= 2"""
|
|
244
|
+
)
|
|
245
|
+
return [row[0] for row in cursor.fetchall()]
|
|
246
|
+
|
|
247
|
+
def get_constants_by_name(self, name: str) -> list[tuple[str, int, str | None]]:
|
|
248
|
+
"""Get all locations of a constant by name.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
name: The constant name to search for
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
List of tuples: (file_path, line_number, value)
|
|
255
|
+
"""
|
|
256
|
+
cursor = self.db.execute(
|
|
257
|
+
"SELECT file_path, line_number, value FROM constants WHERE name = ?",
|
|
258
|
+
(name,),
|
|
259
|
+
)
|
|
260
|
+
return cursor.fetchall()
|
|
261
|
+
|
|
168
262
|
def close(self) -> None:
|
|
169
263
|
"""Close database connection and cleanup tempfile if used."""
|
|
170
264
|
self.db.close()
|
src/linters/dry/cache_query.py
CHANGED
|
@@ -22,6 +22,10 @@ import sqlite3
|
|
|
22
22
|
class CacheQueryService:
|
|
23
23
|
"""Handles cache database queries."""
|
|
24
24
|
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
"""Initialize the cache query service."""
|
|
27
|
+
pass # Stateless query service for database operations
|
|
28
|
+
|
|
25
29
|
def get_duplicate_hashes(self, db: sqlite3.Connection) -> list[int]:
|
|
26
30
|
"""Get all hash values that appear 2+ times.
|
|
27
31
|
|
src/linters/dry/config.py
CHANGED
|
@@ -15,14 +15,20 @@ Exports: DRYConfig dataclass
|
|
|
15
15
|
Interfaces: DRYConfig.__init__, DRYConfig.from_dict(config: dict) -> DRYConfig
|
|
16
16
|
|
|
17
17
|
Implementation: Dataclass with field defaults, __post_init__ validation, and dict-based construction
|
|
18
|
+
|
|
19
|
+
Suppressions:
|
|
20
|
+
- too-many-instance-attributes: Configuration dataclass with related settings
|
|
18
21
|
"""
|
|
19
22
|
|
|
20
23
|
from dataclasses import dataclass, field
|
|
21
24
|
from typing import Any
|
|
22
25
|
|
|
26
|
+
from src.core.constants import StorageMode
|
|
27
|
+
|
|
23
28
|
# Default configuration constants
|
|
24
29
|
DEFAULT_MIN_DUPLICATE_LINES = 3
|
|
25
30
|
DEFAULT_MIN_DUPLICATE_TOKENS = 30
|
|
31
|
+
DEFAULT_DETECT_DUPLICATE_CONSTANTS = True
|
|
26
32
|
|
|
27
33
|
|
|
28
34
|
@dataclass
|
|
@@ -60,23 +66,35 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
60
66
|
}
|
|
61
67
|
)
|
|
62
68
|
|
|
69
|
+
# Duplicate constants detection
|
|
70
|
+
detect_duplicate_constants: bool = DEFAULT_DETECT_DUPLICATE_CONSTANTS
|
|
71
|
+
min_constant_occurrences: int = 2 # Minimum files with same constant to report
|
|
72
|
+
|
|
73
|
+
# Language-specific overrides for constant detection
|
|
74
|
+
python_min_constant_occurrences: int | None = None
|
|
75
|
+
typescript_min_constant_occurrences: int | None = None
|
|
76
|
+
|
|
63
77
|
def __post_init__(self) -> None:
|
|
64
78
|
"""Validate configuration values."""
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
)
|
|
69
|
-
if self.min_duplicate_tokens <= 0:
|
|
70
|
-
raise ValueError(
|
|
71
|
-
f"min_duplicate_tokens must be positive, got {self.min_duplicate_tokens}"
|
|
72
|
-
)
|
|
73
|
-
if self.min_occurrences <= 0:
|
|
74
|
-
raise ValueError(f"min_occurrences must be positive, got {self.min_occurrences}")
|
|
75
|
-
if self.storage_mode not in ("memory", "tempfile"):
|
|
79
|
+
self._validate_positive_fields()
|
|
80
|
+
valid_modes = (StorageMode.MEMORY, StorageMode.TEMPFILE)
|
|
81
|
+
if self.storage_mode not in valid_modes:
|
|
76
82
|
raise ValueError(
|
|
77
83
|
f"storage_mode must be 'memory' or 'tempfile', got '{self.storage_mode}'"
|
|
78
84
|
)
|
|
79
85
|
|
|
86
|
+
def _validate_positive_fields(self) -> None:
|
|
87
|
+
"""Validate that required fields are positive."""
|
|
88
|
+
positive_fields = [
|
|
89
|
+
("min_duplicate_lines", self.min_duplicate_lines),
|
|
90
|
+
("min_duplicate_tokens", self.min_duplicate_tokens),
|
|
91
|
+
("min_occurrences", self.min_occurrences),
|
|
92
|
+
("min_constant_occurrences", self.min_constant_occurrences),
|
|
93
|
+
]
|
|
94
|
+
for name, value in positive_fields:
|
|
95
|
+
if value <= 0:
|
|
96
|
+
raise ValueError(f"{name} must be positive, got {value}")
|
|
97
|
+
|
|
80
98
|
def get_min_occurrences_for_language(self, language: str) -> int:
|
|
81
99
|
"""Get minimum occurrences threshold for a specific language.
|
|
82
100
|
|
|
@@ -97,6 +115,25 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
97
115
|
override = language_overrides.get(language_lower)
|
|
98
116
|
return override if override is not None else self.min_occurrences
|
|
99
117
|
|
|
118
|
+
def get_min_constant_occurrences_for_language(self, language: str) -> int:
|
|
119
|
+
"""Get minimum constant occurrences threshold for a specific language.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
language: Language identifier (e.g., "python", "typescript")
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Minimum constant occurrences threshold for the language, or global default
|
|
126
|
+
"""
|
|
127
|
+
language_lower = language.lower()
|
|
128
|
+
|
|
129
|
+
language_overrides = {
|
|
130
|
+
"python": self.python_min_constant_occurrences,
|
|
131
|
+
"typescript": self.typescript_min_constant_occurrences,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
override = language_overrides.get(language_lower)
|
|
135
|
+
return override if override is not None else self.min_constant_occurrences
|
|
136
|
+
|
|
100
137
|
@classmethod
|
|
101
138
|
def from_dict(cls, config: dict[str, Any]) -> "DRYConfig":
|
|
102
139
|
"""Load configuration from dictionary.
|
|
@@ -131,4 +168,10 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
131
168
|
storage_mode=config.get("storage_mode", "memory"),
|
|
132
169
|
ignore_patterns=config.get("ignore", []),
|
|
133
170
|
filters=filters,
|
|
171
|
+
detect_duplicate_constants=config.get(
|
|
172
|
+
"detect_duplicate_constants", DEFAULT_DETECT_DUPLICATE_CONSTANTS
|
|
173
|
+
),
|
|
174
|
+
min_constant_occurrences=config.get("min_constant_occurrences", 2),
|
|
175
|
+
python_min_constant_occurrences=python_config.get("min_constant_occurrences"),
|
|
176
|
+
typescript_min_constant_occurrences=typescript_config.get("min_constant_occurrences"),
|
|
134
177
|
)
|