thailint 0.2.0__py3-none-any.whl → 0.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +1 -0
- src/analyzers/__init__.py +4 -3
- src/analyzers/ast_utils.py +54 -0
- src/analyzers/rust_base.py +155 -0
- src/analyzers/rust_context.py +141 -0
- src/analyzers/typescript_base.py +4 -0
- src/cli/__init__.py +30 -0
- src/cli/__main__.py +22 -0
- src/cli/config.py +480 -0
- src/cli/config_merge.py +241 -0
- src/cli/linters/__init__.py +67 -0
- src/cli/linters/code_patterns.py +270 -0
- src/cli/linters/code_smells.py +342 -0
- src/cli/linters/documentation.py +83 -0
- src/cli/linters/performance.py +287 -0
- src/cli/linters/shared.py +331 -0
- src/cli/linters/structure.py +327 -0
- src/cli/linters/structure_quality.py +328 -0
- src/cli/main.py +120 -0
- src/cli/utils.py +395 -0
- src/cli_main.py +37 -0
- src/config.py +44 -27
- src/core/base.py +95 -5
- src/core/cli_utils.py +19 -2
- src/core/config_parser.py +36 -6
- src/core/constants.py +54 -0
- src/core/linter_utils.py +95 -6
- src/core/python_lint_rule.py +101 -0
- src/core/registry.py +1 -1
- src/core/rule_discovery.py +147 -84
- src/core/types.py +13 -0
- src/core/violation_builder.py +78 -15
- src/core/violation_utils.py +69 -0
- src/formatters/__init__.py +22 -0
- src/formatters/sarif.py +202 -0
- src/linter_config/directive_markers.py +109 -0
- src/linter_config/ignore.py +254 -395
- src/linter_config/loader.py +45 -12
- src/linter_config/pattern_utils.py +65 -0
- src/linter_config/rule_matcher.py +89 -0
- src/linters/collection_pipeline/__init__.py +90 -0
- src/linters/collection_pipeline/any_all_analyzer.py +281 -0
- src/linters/collection_pipeline/ast_utils.py +40 -0
- src/linters/collection_pipeline/config.py +75 -0
- src/linters/collection_pipeline/continue_analyzer.py +94 -0
- src/linters/collection_pipeline/detector.py +360 -0
- src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
- src/linters/collection_pipeline/linter.py +420 -0
- src/linters/collection_pipeline/suggestion_builder.py +130 -0
- src/linters/cqs/__init__.py +54 -0
- src/linters/cqs/config.py +55 -0
- src/linters/cqs/function_analyzer.py +201 -0
- src/linters/cqs/input_detector.py +139 -0
- src/linters/cqs/linter.py +159 -0
- src/linters/cqs/output_detector.py +84 -0
- src/linters/cqs/python_analyzer.py +54 -0
- src/linters/cqs/types.py +82 -0
- src/linters/cqs/typescript_cqs_analyzer.py +61 -0
- src/linters/cqs/typescript_function_analyzer.py +192 -0
- src/linters/cqs/typescript_input_detector.py +203 -0
- src/linters/cqs/typescript_output_detector.py +117 -0
- src/linters/cqs/violation_builder.py +94 -0
- src/linters/dry/base_token_analyzer.py +16 -9
- src/linters/dry/block_filter.py +125 -22
- src/linters/dry/block_grouper.py +4 -0
- src/linters/dry/cache.py +142 -94
- src/linters/dry/cache_query.py +4 -0
- src/linters/dry/config.py +68 -21
- src/linters/dry/constant.py +92 -0
- src/linters/dry/constant_matcher.py +223 -0
- src/linters/dry/constant_violation_builder.py +98 -0
- src/linters/dry/duplicate_storage.py +20 -82
- src/linters/dry/file_analyzer.py +15 -50
- src/linters/dry/inline_ignore.py +7 -16
- src/linters/dry/linter.py +182 -54
- src/linters/dry/python_analyzer.py +108 -336
- src/linters/dry/python_constant_extractor.py +100 -0
- src/linters/dry/single_statement_detector.py +417 -0
- src/linters/dry/storage_initializer.py +9 -18
- src/linters/dry/token_hasher.py +129 -71
- src/linters/dry/typescript_analyzer.py +68 -380
- src/linters/dry/typescript_constant_extractor.py +138 -0
- src/linters/dry/typescript_statement_detector.py +255 -0
- src/linters/dry/typescript_value_extractor.py +70 -0
- src/linters/dry/violation_builder.py +4 -0
- src/linters/dry/violation_filter.py +9 -5
- src/linters/dry/violation_generator.py +71 -14
- src/linters/file_header/__init__.py +24 -0
- src/linters/file_header/atemporal_detector.py +105 -0
- src/linters/file_header/base_parser.py +93 -0
- src/linters/file_header/bash_parser.py +66 -0
- src/linters/file_header/config.py +140 -0
- src/linters/file_header/css_parser.py +70 -0
- src/linters/file_header/field_validator.py +72 -0
- src/linters/file_header/linter.py +309 -0
- src/linters/file_header/markdown_parser.py +130 -0
- src/linters/file_header/python_parser.py +42 -0
- src/linters/file_header/typescript_parser.py +73 -0
- src/linters/file_header/violation_builder.py +79 -0
- src/linters/file_placement/config_loader.py +3 -1
- src/linters/file_placement/directory_matcher.py +4 -0
- src/linters/file_placement/linter.py +74 -31
- src/linters/file_placement/pattern_matcher.py +41 -6
- src/linters/file_placement/pattern_validator.py +31 -12
- src/linters/file_placement/rule_checker.py +12 -7
- src/linters/lazy_ignores/__init__.py +43 -0
- src/linters/lazy_ignores/config.py +74 -0
- src/linters/lazy_ignores/directive_utils.py +164 -0
- src/linters/lazy_ignores/header_parser.py +177 -0
- src/linters/lazy_ignores/linter.py +158 -0
- src/linters/lazy_ignores/matcher.py +168 -0
- src/linters/lazy_ignores/python_analyzer.py +209 -0
- src/linters/lazy_ignores/rule_id_utils.py +180 -0
- src/linters/lazy_ignores/skip_detector.py +298 -0
- src/linters/lazy_ignores/types.py +71 -0
- src/linters/lazy_ignores/typescript_analyzer.py +146 -0
- src/linters/lazy_ignores/violation_builder.py +135 -0
- src/linters/lbyl/__init__.py +31 -0
- src/linters/lbyl/config.py +63 -0
- src/linters/lbyl/linter.py +67 -0
- src/linters/lbyl/pattern_detectors/__init__.py +53 -0
- src/linters/lbyl/pattern_detectors/base.py +63 -0
- src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
- src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
- src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
- src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
- src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
- src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
- src/linters/lbyl/python_analyzer.py +215 -0
- src/linters/lbyl/violation_builder.py +354 -0
- src/linters/magic_numbers/__init__.py +48 -0
- src/linters/magic_numbers/config.py +82 -0
- src/linters/magic_numbers/context_analyzer.py +249 -0
- src/linters/magic_numbers/linter.py +462 -0
- src/linters/magic_numbers/python_analyzer.py +64 -0
- src/linters/magic_numbers/typescript_analyzer.py +215 -0
- src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
- src/linters/magic_numbers/violation_builder.py +98 -0
- src/linters/method_property/__init__.py +49 -0
- src/linters/method_property/config.py +138 -0
- src/linters/method_property/linter.py +414 -0
- src/linters/method_property/python_analyzer.py +473 -0
- src/linters/method_property/violation_builder.py +119 -0
- src/linters/nesting/__init__.py +6 -2
- src/linters/nesting/config.py +6 -3
- src/linters/nesting/linter.py +31 -34
- src/linters/nesting/python_analyzer.py +4 -0
- src/linters/nesting/typescript_analyzer.py +6 -11
- src/linters/nesting/violation_builder.py +1 -0
- src/linters/performance/__init__.py +91 -0
- src/linters/performance/config.py +43 -0
- src/linters/performance/constants.py +49 -0
- src/linters/performance/linter.py +149 -0
- src/linters/performance/python_analyzer.py +365 -0
- src/linters/performance/regex_analyzer.py +312 -0
- src/linters/performance/regex_linter.py +139 -0
- src/linters/performance/typescript_analyzer.py +236 -0
- src/linters/performance/violation_builder.py +160 -0
- src/linters/print_statements/__init__.py +53 -0
- src/linters/print_statements/config.py +78 -0
- src/linters/print_statements/linter.py +413 -0
- src/linters/print_statements/python_analyzer.py +153 -0
- src/linters/print_statements/typescript_analyzer.py +125 -0
- src/linters/print_statements/violation_builder.py +96 -0
- src/linters/srp/__init__.py +3 -3
- src/linters/srp/class_analyzer.py +11 -7
- src/linters/srp/config.py +12 -6
- src/linters/srp/heuristics.py +56 -22
- src/linters/srp/linter.py +47 -39
- src/linters/srp/python_analyzer.py +55 -20
- src/linters/srp/typescript_metrics_calculator.py +110 -50
- src/linters/stateless_class/__init__.py +25 -0
- src/linters/stateless_class/config.py +58 -0
- src/linters/stateless_class/linter.py +349 -0
- src/linters/stateless_class/python_analyzer.py +290 -0
- src/linters/stringly_typed/__init__.py +36 -0
- src/linters/stringly_typed/config.py +189 -0
- src/linters/stringly_typed/context_filter.py +451 -0
- src/linters/stringly_typed/function_call_violation_builder.py +135 -0
- src/linters/stringly_typed/ignore_checker.py +100 -0
- src/linters/stringly_typed/ignore_utils.py +51 -0
- src/linters/stringly_typed/linter.py +376 -0
- src/linters/stringly_typed/python/__init__.py +33 -0
- src/linters/stringly_typed/python/analyzer.py +348 -0
- src/linters/stringly_typed/python/call_tracker.py +175 -0
- src/linters/stringly_typed/python/comparison_tracker.py +257 -0
- src/linters/stringly_typed/python/condition_extractor.py +134 -0
- src/linters/stringly_typed/python/conditional_detector.py +179 -0
- src/linters/stringly_typed/python/constants.py +21 -0
- src/linters/stringly_typed/python/match_analyzer.py +94 -0
- src/linters/stringly_typed/python/validation_detector.py +189 -0
- src/linters/stringly_typed/python/variable_extractor.py +96 -0
- src/linters/stringly_typed/storage.py +620 -0
- src/linters/stringly_typed/storage_initializer.py +45 -0
- src/linters/stringly_typed/typescript/__init__.py +28 -0
- src/linters/stringly_typed/typescript/analyzer.py +157 -0
- src/linters/stringly_typed/typescript/call_tracker.py +335 -0
- src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
- src/linters/stringly_typed/violation_generator.py +419 -0
- src/orchestrator/core.py +264 -16
- src/orchestrator/language_detector.py +5 -3
- src/templates/thailint_config_template.yaml +354 -0
- src/utils/project_root.py +138 -16
- thailint-0.15.3.dist-info/METADATA +187 -0
- thailint-0.15.3.dist-info/RECORD +226 -0
- {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +1 -1
- thailint-0.15.3.dist-info/entry_points.txt +4 -0
- src/cli.py +0 -1055
- thailint-0.2.0.dist-info/METADATA +0 -980
- thailint-0.2.0.dist-info/RECORD +0 -75
- thailint-0.2.0.dist-info/entry_points.txt +0 -4
- {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info/licenses}/LICENSE +0 -0
|
@@ -8,7 +8,7 @@ Overview: Analyzes Python source files to extract code blocks for duplicate dete
|
|
|
8
8
|
Filters out docstrings at the tokenization level to prevent false positive duplication
|
|
9
9
|
detection on documentation strings.
|
|
10
10
|
|
|
11
|
-
Dependencies: BaseTokenAnalyzer, CodeBlock, DRYConfig, pathlib.Path, ast,
|
|
11
|
+
Dependencies: BaseTokenAnalyzer, CodeBlock, DRYConfig, pathlib.Path, ast, token_hasher module
|
|
12
12
|
|
|
13
13
|
Exports: PythonDuplicateAnalyzer class
|
|
14
14
|
|
|
@@ -17,6 +17,12 @@ Interfaces: PythonDuplicateAnalyzer.analyze(file_path: Path, content: str, confi
|
|
|
17
17
|
|
|
18
18
|
Implementation: Uses custom tokenizer that filters docstrings before hashing
|
|
19
19
|
|
|
20
|
+
Suppressions:
|
|
21
|
+
- too-many-arguments,too-many-positional-arguments: Line processing with related params
|
|
22
|
+
- type:ignore[arg-type]: ast.get_docstring returns str|None, typing limitation
|
|
23
|
+
- srp.violation: Complex AST analysis algorithm for duplicate detection. See SRP Exception below.
|
|
24
|
+
- nesting.excessive-depth: analyze method uses nested loops for docstring extraction.
|
|
25
|
+
|
|
20
26
|
SRP Exception: PythonDuplicateAnalyzer has 32 methods and 358 lines (exceeds max 8 methods/200 lines)
|
|
21
27
|
Justification: Complex AST analysis algorithm for duplicate code detection with sophisticated
|
|
22
28
|
false positive filtering. Methods form tightly coupled algorithm pipeline: docstring extraction,
|
|
@@ -29,18 +35,14 @@ SRP Exception: PythonDuplicateAnalyzer has 32 methods and 358 lines (exceeds max
|
|
|
29
35
|
"""
|
|
30
36
|
|
|
31
37
|
import ast
|
|
32
|
-
from collections.abc import Callable
|
|
33
38
|
from pathlib import Path
|
|
34
|
-
from typing import cast
|
|
35
39
|
|
|
40
|
+
from . import token_hasher
|
|
36
41
|
from .base_token_analyzer import BaseTokenAnalyzer
|
|
37
42
|
from .block_filter import BlockFilterRegistry, create_default_registry
|
|
38
43
|
from .cache import CodeBlock
|
|
39
44
|
from .config import DRYConfig
|
|
40
|
-
|
|
41
|
-
# Type alias for AST nodes that have line number attributes
|
|
42
|
-
# All stmt and expr nodes have lineno and end_lineno after parsing
|
|
43
|
-
ASTWithLineNumbers = ast.stmt | ast.expr
|
|
45
|
+
from .single_statement_detector import SingleStatementDetector
|
|
44
46
|
|
|
45
47
|
|
|
46
48
|
class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violation]
|
|
@@ -58,8 +60,12 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
58
60
|
"""
|
|
59
61
|
super().__init__()
|
|
60
62
|
self._filter_registry = filter_registry or create_default_registry()
|
|
63
|
+
# Single-statement detector is created per-analysis with cached AST data
|
|
64
|
+
self._statement_detector: SingleStatementDetector | None = None
|
|
61
65
|
|
|
62
|
-
def analyze(
|
|
66
|
+
def analyze( # thailint: ignore[nesting.excessive-depth]
|
|
67
|
+
self, file_path: Path, content: str, config: DRYConfig
|
|
68
|
+
) -> list[CodeBlock]:
|
|
63
69
|
"""Analyze Python file for duplicate code blocks, excluding docstrings.
|
|
64
70
|
|
|
65
71
|
Args:
|
|
@@ -70,37 +76,72 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
70
76
|
Returns:
|
|
71
77
|
List of CodeBlock instances with hash values
|
|
72
78
|
"""
|
|
73
|
-
#
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
#
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
block = CodeBlock(
|
|
90
|
-
file_path=file_path,
|
|
91
|
-
start_line=start_line,
|
|
92
|
-
end_line=end_line,
|
|
93
|
-
snippet=snippet,
|
|
94
|
-
hash_value=hash_val,
|
|
79
|
+
# Performance optimization: Parse AST once and create detector with cached data
|
|
80
|
+
cached_ast = self._parse_content_safe(content)
|
|
81
|
+
line_to_nodes = SingleStatementDetector.build_line_to_node_index(cached_ast)
|
|
82
|
+
self._statement_detector = SingleStatementDetector(cached_ast, content, line_to_nodes)
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
# Get docstring line ranges
|
|
86
|
+
docstring_ranges = self._get_docstring_ranges_from_content(content)
|
|
87
|
+
|
|
88
|
+
# Tokenize with line number tracking
|
|
89
|
+
lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
|
|
90
|
+
|
|
91
|
+
# Generate rolling hash windows
|
|
92
|
+
windows = self._rolling_hash_with_tracking(
|
|
93
|
+
lines_with_numbers, config.min_duplicate_lines
|
|
95
94
|
)
|
|
96
95
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
96
|
+
return self._filter_valid_blocks(windows, file_path, content)
|
|
97
|
+
finally:
|
|
98
|
+
# Clear detector after analysis to avoid memory leaks
|
|
99
|
+
self._statement_detector = None
|
|
100
|
+
|
|
101
|
+
def _filter_valid_blocks(
|
|
102
|
+
self,
|
|
103
|
+
windows: list[tuple[int, int, int, str]],
|
|
104
|
+
file_path: Path,
|
|
105
|
+
content: str,
|
|
106
|
+
) -> list[CodeBlock]:
|
|
107
|
+
"""Filter hash windows and create valid CodeBlock instances."""
|
|
108
|
+
return [
|
|
109
|
+
block
|
|
110
|
+
for hash_val, start_line, end_line, snippet in windows
|
|
111
|
+
if (
|
|
112
|
+
block := self._create_block_if_valid(
|
|
113
|
+
file_path, content, hash_val, start_line, end_line, snippet
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
def _create_block_if_valid( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
119
|
+
self,
|
|
120
|
+
file_path: Path,
|
|
121
|
+
content: str,
|
|
122
|
+
hash_val: int,
|
|
123
|
+
start_line: int,
|
|
124
|
+
end_line: int,
|
|
125
|
+
snippet: str,
|
|
126
|
+
) -> CodeBlock | None:
|
|
127
|
+
"""Create CodeBlock if it passes all validation checks."""
|
|
128
|
+
if self._statement_detector and self._statement_detector.is_single_statement(
|
|
129
|
+
content, start_line, end_line
|
|
130
|
+
):
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
block = CodeBlock(
|
|
134
|
+
file_path=file_path,
|
|
135
|
+
start_line=start_line,
|
|
136
|
+
end_line=end_line,
|
|
137
|
+
snippet=snippet,
|
|
138
|
+
hash_value=hash_val,
|
|
139
|
+
)
|
|
100
140
|
|
|
101
|
-
|
|
141
|
+
if self._filter_registry.should_filter_block(block, content):
|
|
142
|
+
return None
|
|
102
143
|
|
|
103
|
-
return
|
|
144
|
+
return block
|
|
104
145
|
|
|
105
146
|
def _get_docstring_ranges_from_content(self, content: str) -> set[int]:
|
|
106
147
|
"""Extract line numbers that are part of docstrings.
|
|
@@ -168,25 +209,44 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
168
209
|
List of (original_line_number, normalized_code) tuples
|
|
169
210
|
"""
|
|
170
211
|
lines_with_numbers = []
|
|
212
|
+
in_multiline_import = False
|
|
171
213
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
214
|
+
non_docstring_lines = (
|
|
215
|
+
(line_num, line)
|
|
216
|
+
for line_num, line in enumerate(content.split("\n"), start=1)
|
|
217
|
+
if line_num not in docstring_lines
|
|
218
|
+
)
|
|
219
|
+
for line_num, line in non_docstring_lines:
|
|
220
|
+
in_multiline_import, normalized = self._normalize_and_filter_line(
|
|
221
|
+
line, in_multiline_import
|
|
222
|
+
)
|
|
223
|
+
if normalized is not None:
|
|
224
|
+
lines_with_numbers.append((line_num, normalized))
|
|
176
225
|
|
|
177
|
-
|
|
178
|
-
line = self._hasher._strip_comments(line) # pylint: disable=protected-access
|
|
179
|
-
line = " ".join(line.split())
|
|
226
|
+
return lines_with_numbers
|
|
180
227
|
|
|
181
|
-
|
|
182
|
-
|
|
228
|
+
def _normalize_and_filter_line(
|
|
229
|
+
self, line: str, in_multiline_import: bool
|
|
230
|
+
) -> tuple[bool, str | None]:
|
|
231
|
+
"""Normalize line and check if it should be included.
|
|
183
232
|
|
|
184
|
-
|
|
185
|
-
|
|
233
|
+
Args:
|
|
234
|
+
line: Raw source line
|
|
235
|
+
in_multiline_import: Current multi-line import state
|
|
186
236
|
|
|
187
|
-
|
|
237
|
+
Returns:
|
|
238
|
+
Tuple of (new_import_state, normalized_line or None if should skip)
|
|
239
|
+
"""
|
|
240
|
+
normalized = token_hasher.normalize_line(line)
|
|
241
|
+
if not normalized:
|
|
242
|
+
return in_multiline_import, None
|
|
188
243
|
|
|
189
|
-
|
|
244
|
+
new_state, should_skip = token_hasher.should_skip_import_line(
|
|
245
|
+
normalized, in_multiline_import
|
|
246
|
+
)
|
|
247
|
+
if should_skip:
|
|
248
|
+
return new_state, None
|
|
249
|
+
return new_state, normalized
|
|
190
250
|
|
|
191
251
|
def _rolling_hash_with_tracking(
|
|
192
252
|
self, lines_with_numbers: list[tuple[int, str]], window_size: int
|
|
@@ -220,14 +280,6 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
220
280
|
|
|
221
281
|
return hashes
|
|
222
282
|
|
|
223
|
-
def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
|
|
224
|
-
"""Check if a line range in the original source is a single logical statement."""
|
|
225
|
-
tree = self._parse_content_safe(content)
|
|
226
|
-
if tree is None:
|
|
227
|
-
return False
|
|
228
|
-
|
|
229
|
-
return self._check_overlapping_nodes(tree, start_line, end_line)
|
|
230
|
-
|
|
231
283
|
@staticmethod
|
|
232
284
|
def _parse_content_safe(content: str) -> ast.Module | None:
|
|
233
285
|
"""Parse content, returning None on syntax error."""
|
|
@@ -235,283 +287,3 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
|
|
|
235
287
|
return ast.parse(content)
|
|
236
288
|
except SyntaxError:
|
|
237
289
|
return None
|
|
238
|
-
|
|
239
|
-
def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
|
|
240
|
-
"""Check if any AST node overlaps and matches single-statement pattern."""
|
|
241
|
-
for node in ast.walk(tree):
|
|
242
|
-
if self._node_overlaps_and_matches(node, start_line, end_line):
|
|
243
|
-
return True
|
|
244
|
-
return False
|
|
245
|
-
|
|
246
|
-
def _node_overlaps_and_matches(self, node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
247
|
-
"""Check if node overlaps with range and matches single-statement pattern."""
|
|
248
|
-
if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
|
|
249
|
-
return False
|
|
250
|
-
|
|
251
|
-
overlaps = not (node.end_lineno < start_line or node.lineno > end_line)
|
|
252
|
-
if not overlaps:
|
|
253
|
-
return False
|
|
254
|
-
|
|
255
|
-
return self._is_single_statement_pattern(node, start_line, end_line)
|
|
256
|
-
|
|
257
|
-
def _is_single_statement_pattern(self, node: ast.AST, start_line: int, end_line: int) -> bool:
|
|
258
|
-
"""Check if an AST node represents a single-statement pattern to filter.
|
|
259
|
-
|
|
260
|
-
Args:
|
|
261
|
-
node: AST node that overlaps with the line range
|
|
262
|
-
start_line: Starting line number (1-indexed)
|
|
263
|
-
end_line: Ending line number (1-indexed)
|
|
264
|
-
|
|
265
|
-
Returns:
|
|
266
|
-
True if this node represents a single logical statement pattern
|
|
267
|
-
"""
|
|
268
|
-
contains = self._node_contains_range(node, start_line, end_line)
|
|
269
|
-
if contains is None:
|
|
270
|
-
return False
|
|
271
|
-
|
|
272
|
-
return self._dispatch_pattern_check(node, start_line, end_line, contains)
|
|
273
|
-
|
|
274
|
-
def _node_contains_range(self, node: ast.AST, start_line: int, end_line: int) -> bool | None:
|
|
275
|
-
"""Check if node completely contains the range. Returns None if invalid."""
|
|
276
|
-
if not self._has_valid_line_numbers(node):
|
|
277
|
-
return None
|
|
278
|
-
# Type narrowing: _has_valid_line_numbers ensures node has line numbers
|
|
279
|
-
# Safe to cast after validation check above
|
|
280
|
-
typed_node = cast(ASTWithLineNumbers, node)
|
|
281
|
-
# Use type: ignore to suppress MyPy's inability to understand runtime validation
|
|
282
|
-
return typed_node.lineno <= start_line and typed_node.end_lineno >= end_line # type: ignore[operator]
|
|
283
|
-
|
|
284
|
-
@staticmethod
|
|
285
|
-
def _has_valid_line_numbers(node: ast.AST) -> bool:
|
|
286
|
-
"""Check if node has valid line number attributes."""
|
|
287
|
-
if not (hasattr(node, "lineno") and hasattr(node, "end_lineno")):
|
|
288
|
-
return False
|
|
289
|
-
return node.lineno is not None and node.end_lineno is not None
|
|
290
|
-
|
|
291
|
-
def _dispatch_pattern_check(
|
|
292
|
-
self, node: ast.AST, start_line: int, end_line: int, contains: bool
|
|
293
|
-
) -> bool:
|
|
294
|
-
"""Dispatch to node-type-specific pattern checkers."""
|
|
295
|
-
# Simple containment check for Expr nodes
|
|
296
|
-
if isinstance(node, ast.Expr):
|
|
297
|
-
return contains
|
|
298
|
-
|
|
299
|
-
# Delegate to specialized checkers
|
|
300
|
-
return self._check_specific_pattern(node, start_line, end_line, contains)
|
|
301
|
-
|
|
302
|
-
def _check_specific_pattern(
|
|
303
|
-
self, node: ast.AST, start_line: int, end_line: int, contains: bool
|
|
304
|
-
) -> bool:
|
|
305
|
-
"""Check specific node types with their pattern rules."""
|
|
306
|
-
if isinstance(node, ast.ClassDef):
|
|
307
|
-
return self._check_class_def_pattern(node, start_line, end_line)
|
|
308
|
-
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
309
|
-
return self._check_function_def_pattern(node, start_line, end_line)
|
|
310
|
-
if isinstance(node, ast.Call):
|
|
311
|
-
return self._check_call_pattern(node, start_line, end_line, contains)
|
|
312
|
-
if isinstance(node, ast.Assign):
|
|
313
|
-
return self._check_assign_pattern(node, start_line, end_line, contains)
|
|
314
|
-
return False
|
|
315
|
-
|
|
316
|
-
def _check_class_def_pattern(self, node: ast.ClassDef, start_line: int, end_line: int) -> bool:
|
|
317
|
-
"""Check if range is in class field definitions (not method bodies)."""
|
|
318
|
-
first_method_line = self._find_first_method_line(node)
|
|
319
|
-
class_start = self._get_class_start_with_decorators(node)
|
|
320
|
-
return self._is_in_class_fields_area(
|
|
321
|
-
class_start, start_line, end_line, first_method_line, node.end_lineno
|
|
322
|
-
)
|
|
323
|
-
|
|
324
|
-
@staticmethod
|
|
325
|
-
def _find_first_method_line(node: ast.ClassDef) -> int | None:
|
|
326
|
-
"""Find line number of first method in class."""
|
|
327
|
-
for item in node.body:
|
|
328
|
-
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
329
|
-
return item.lineno
|
|
330
|
-
return None
|
|
331
|
-
|
|
332
|
-
@staticmethod
|
|
333
|
-
def _get_class_start_with_decorators(node: ast.ClassDef) -> int:
|
|
334
|
-
"""Get class start line, including decorators if present."""
|
|
335
|
-
if node.decorator_list:
|
|
336
|
-
return min(d.lineno for d in node.decorator_list)
|
|
337
|
-
return node.lineno
|
|
338
|
-
|
|
339
|
-
@staticmethod
|
|
340
|
-
def _is_in_class_fields_area(
|
|
341
|
-
class_start: int,
|
|
342
|
-
start_line: int,
|
|
343
|
-
end_line: int,
|
|
344
|
-
first_method_line: int | None,
|
|
345
|
-
class_end_line: int | None,
|
|
346
|
-
) -> bool:
|
|
347
|
-
"""Check if range is in class fields area (before methods)."""
|
|
348
|
-
if first_method_line is not None:
|
|
349
|
-
return class_start <= start_line and end_line < first_method_line
|
|
350
|
-
if class_end_line is not None:
|
|
351
|
-
return class_start <= start_line and class_end_line >= end_line
|
|
352
|
-
return False
|
|
353
|
-
|
|
354
|
-
def _check_function_def_pattern(
|
|
355
|
-
self, node: ast.FunctionDef | ast.AsyncFunctionDef, start_line: int, end_line: int
|
|
356
|
-
) -> bool:
|
|
357
|
-
"""Check if range is in function decorator pattern."""
|
|
358
|
-
if not node.decorator_list:
|
|
359
|
-
return False
|
|
360
|
-
|
|
361
|
-
first_decorator_line = min(d.lineno for d in node.decorator_list)
|
|
362
|
-
first_body_line = self._get_function_body_start(node)
|
|
363
|
-
|
|
364
|
-
if first_body_line is None:
|
|
365
|
-
return False
|
|
366
|
-
|
|
367
|
-
return start_line >= first_decorator_line and end_line < first_body_line
|
|
368
|
-
|
|
369
|
-
@staticmethod
|
|
370
|
-
def _get_function_body_start(node: ast.FunctionDef | ast.AsyncFunctionDef) -> int | None:
|
|
371
|
-
"""Get the line number where function body starts."""
|
|
372
|
-
if not node.body or not hasattr(node.body[0], "lineno"):
|
|
373
|
-
return None
|
|
374
|
-
return node.body[0].lineno
|
|
375
|
-
|
|
376
|
-
def _check_call_pattern(
|
|
377
|
-
self, node: ast.Call, start_line: int, end_line: int, contains: bool
|
|
378
|
-
) -> bool:
|
|
379
|
-
"""Check if range is part of a function/constructor call."""
|
|
380
|
-
return self._check_multiline_or_contained(node, start_line, end_line, contains)
|
|
381
|
-
|
|
382
|
-
def _check_assign_pattern(
|
|
383
|
-
self, node: ast.Assign, start_line: int, end_line: int, contains: bool
|
|
384
|
-
) -> bool:
|
|
385
|
-
"""Check if range is part of a multi-line assignment."""
|
|
386
|
-
return self._check_multiline_or_contained(node, start_line, end_line, contains)
|
|
387
|
-
|
|
388
|
-
def _check_multiline_or_contained(
|
|
389
|
-
self, node: ast.AST, start_line: int, end_line: int, contains: bool
|
|
390
|
-
) -> bool:
|
|
391
|
-
"""Check if node is multiline containing start, or single-line containing range."""
|
|
392
|
-
if not self._has_valid_line_numbers(node):
|
|
393
|
-
return False
|
|
394
|
-
|
|
395
|
-
# Type narrowing: _has_valid_line_numbers ensures node has line numbers
|
|
396
|
-
# Safe to cast after validation check above
|
|
397
|
-
typed_node = cast(ASTWithLineNumbers, node)
|
|
398
|
-
# Use type: ignore to suppress MyPy's inability to understand runtime validation
|
|
399
|
-
is_multiline = typed_node.lineno < typed_node.end_lineno # type: ignore[operator]
|
|
400
|
-
if is_multiline:
|
|
401
|
-
return typed_node.lineno <= start_line <= typed_node.end_lineno # type: ignore[operator]
|
|
402
|
-
return contains
|
|
403
|
-
|
|
404
|
-
def _is_standalone_single_statement(
|
|
405
|
-
self, lines: list[str], start_line: int, end_line: int
|
|
406
|
-
) -> bool:
|
|
407
|
-
"""Check if the exact range parses as a single statement on its own."""
|
|
408
|
-
source_lines = lines[start_line - 1 : end_line]
|
|
409
|
-
source_snippet = "\n".join(source_lines)
|
|
410
|
-
|
|
411
|
-
try:
|
|
412
|
-
tree = ast.parse(source_snippet)
|
|
413
|
-
return len(tree.body) == 1
|
|
414
|
-
except SyntaxError:
|
|
415
|
-
return False
|
|
416
|
-
|
|
417
|
-
def _check_ast_context( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
418
|
-
self,
|
|
419
|
-
lines: list[str],
|
|
420
|
-
start_line: int,
|
|
421
|
-
end_line: int,
|
|
422
|
-
lookback: int,
|
|
423
|
-
lookforward: int,
|
|
424
|
-
predicate: Callable[[ast.Module, int], bool],
|
|
425
|
-
) -> bool:
|
|
426
|
-
"""Generic helper for AST-based context checking.
|
|
427
|
-
|
|
428
|
-
Args:
|
|
429
|
-
lines: Source file lines
|
|
430
|
-
start_line: Starting line number (1-indexed)
|
|
431
|
-
end_line: Ending line number (1-indexed)
|
|
432
|
-
lookback: Number of lines to look backward
|
|
433
|
-
lookforward: Number of lines to look forward
|
|
434
|
-
predicate: Function that takes AST tree and returns bool
|
|
435
|
-
|
|
436
|
-
Returns:
|
|
437
|
-
True if predicate returns True for the parsed context
|
|
438
|
-
"""
|
|
439
|
-
lookback_start = max(0, start_line - lookback)
|
|
440
|
-
lookforward_end = min(len(lines), end_line + lookforward)
|
|
441
|
-
|
|
442
|
-
context_lines = lines[lookback_start:lookforward_end]
|
|
443
|
-
context = "\n".join(context_lines)
|
|
444
|
-
|
|
445
|
-
try:
|
|
446
|
-
tree = ast.parse(context)
|
|
447
|
-
return predicate(tree, lookback_start)
|
|
448
|
-
except SyntaxError:
|
|
449
|
-
pass
|
|
450
|
-
|
|
451
|
-
return False
|
|
452
|
-
|
|
453
|
-
def _is_part_of_decorator(self, lines: list[str], start_line: int, end_line: int) -> bool:
|
|
454
|
-
"""Check if lines are part of a decorator + function definition.
|
|
455
|
-
|
|
456
|
-
A decorator pattern is @something(...) followed by def/class.
|
|
457
|
-
"""
|
|
458
|
-
|
|
459
|
-
def has_decorators(tree: ast.Module, _lookback_start: int) -> bool:
|
|
460
|
-
"""Check if any function or class in the tree has decorators."""
|
|
461
|
-
for stmt in tree.body:
|
|
462
|
-
if isinstance(stmt, (ast.FunctionDef, ast.ClassDef)) and stmt.decorator_list:
|
|
463
|
-
return True
|
|
464
|
-
return False
|
|
465
|
-
|
|
466
|
-
return self._check_ast_context(lines, start_line, end_line, 10, 10, has_decorators)
|
|
467
|
-
|
|
468
|
-
def _is_part_of_function_call(self, lines: list[str], start_line: int, end_line: int) -> bool:
|
|
469
|
-
"""Check if lines are arguments inside a function/constructor call.
|
|
470
|
-
|
|
471
|
-
Detects patterns like:
|
|
472
|
-
obj = Constructor(
|
|
473
|
-
arg1=value1,
|
|
474
|
-
arg2=value2,
|
|
475
|
-
)
|
|
476
|
-
"""
|
|
477
|
-
|
|
478
|
-
def is_single_non_function_statement(tree: ast.Module, _lookback_start: int) -> bool:
|
|
479
|
-
"""Check if context has exactly one statement that's not a function/class def."""
|
|
480
|
-
return len(tree.body) == 1 and not isinstance(
|
|
481
|
-
tree.body[0], (ast.FunctionDef, ast.ClassDef)
|
|
482
|
-
)
|
|
483
|
-
|
|
484
|
-
return self._check_ast_context(
|
|
485
|
-
lines, start_line, end_line, 10, 10, is_single_non_function_statement
|
|
486
|
-
)
|
|
487
|
-
|
|
488
|
-
def _is_part_of_class_body(self, lines: list[str], start_line: int, end_line: int) -> bool:
|
|
489
|
-
"""Check if lines are field definitions inside a class body.
|
|
490
|
-
|
|
491
|
-
Detects patterns like:
|
|
492
|
-
class Foo:
|
|
493
|
-
field1: Type1
|
|
494
|
-
field2: Type2
|
|
495
|
-
"""
|
|
496
|
-
|
|
497
|
-
def is_within_class_body(tree: ast.Module, lookback_start: int) -> bool:
|
|
498
|
-
"""Check if flagged range falls within a class body."""
|
|
499
|
-
for stmt in tree.body:
|
|
500
|
-
if not isinstance(stmt, ast.ClassDef):
|
|
501
|
-
continue
|
|
502
|
-
|
|
503
|
-
# Adjust line numbers: stmt.lineno is relative to context
|
|
504
|
-
# We need to convert back to original file line numbers
|
|
505
|
-
class_start_in_context = stmt.lineno
|
|
506
|
-
class_end_in_context = stmt.end_lineno if stmt.end_lineno else stmt.lineno
|
|
507
|
-
|
|
508
|
-
# Convert to original file line numbers (1-indexed)
|
|
509
|
-
class_start_original = lookback_start + class_start_in_context
|
|
510
|
-
class_end_original = lookback_start + class_end_in_context
|
|
511
|
-
|
|
512
|
-
# Check if the flagged range overlaps with class body
|
|
513
|
-
if start_line >= class_start_original and end_line <= class_end_original:
|
|
514
|
-
return True
|
|
515
|
-
return False
|
|
516
|
-
|
|
517
|
-
return self._check_ast_context(lines, start_line, end_line, 10, 5, is_within_class_body)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Extract Python module-level constants using AST parsing
|
|
3
|
+
|
|
4
|
+
Scope: Python constant extraction for duplicate constants detection
|
|
5
|
+
|
|
6
|
+
Overview: Extracts module-level constant definitions from Python source code using the AST module.
|
|
7
|
+
Identifies constants as module-level assignments where the target name matches the ALL_CAPS
|
|
8
|
+
naming convention (e.g., API_TIMEOUT = 30). Excludes private constants (leading underscore),
|
|
9
|
+
class-level constants, and function-level constants to focus on public module constants that
|
|
10
|
+
should be consolidated across files.
|
|
11
|
+
|
|
12
|
+
Dependencies: Python ast module, re for pattern matching, ConstantInfo from constant module
|
|
13
|
+
|
|
14
|
+
Exports: extract_python_constants function
|
|
15
|
+
|
|
16
|
+
Interfaces: extract_python_constants(content: str) -> list[ConstantInfo]
|
|
17
|
+
|
|
18
|
+
Implementation: AST-based parsing with module-level filtering and ALL_CAPS regex matching
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import ast
|
|
22
|
+
|
|
23
|
+
from .constant import CONSTANT_NAME_PATTERN, ConstantInfo
|
|
24
|
+
|
|
25
|
+
# Container types with fixed representations
|
|
26
|
+
CONTAINER_REPRESENTATIONS = {ast.List: "[...]", ast.Dict: "{...}", ast.Tuple: "(...)"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def extract_python_constants(content: str) -> list[ConstantInfo]:
|
|
30
|
+
"""Extract constants from Python source code.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
content: Python source code as string
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List of ConstantInfo for module-level constants
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
tree = ast.parse(content)
|
|
40
|
+
except SyntaxError:
|
|
41
|
+
return []
|
|
42
|
+
constants: list[ConstantInfo] = []
|
|
43
|
+
for node in tree.body:
|
|
44
|
+
constants.extend(_extract_from_node(node))
|
|
45
|
+
return constants
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _extract_from_node(node: ast.stmt) -> list[ConstantInfo]:
|
|
49
|
+
"""Extract constants from a single AST node."""
|
|
50
|
+
if isinstance(node, ast.Assign):
|
|
51
|
+
return _extract_from_assign(node)
|
|
52
|
+
if isinstance(node, ast.AnnAssign):
|
|
53
|
+
return _extract_from_ann_assign(node)
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _extract_from_assign(node: ast.Assign) -> list[ConstantInfo]:
|
|
58
|
+
"""Extract constants from a simple assignment."""
|
|
59
|
+
return [info for t in node.targets if (info := _to_const_info(t, node.value, node.lineno))]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_from_ann_assign(node: ast.AnnAssign) -> list[ConstantInfo]:
|
|
63
|
+
"""Extract constants from an annotated assignment."""
|
|
64
|
+
if node.value is None:
|
|
65
|
+
return []
|
|
66
|
+
info = _to_const_info(node.target, node.value, node.lineno)
|
|
67
|
+
return [info] if info else []
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _to_const_info(target: ast.expr, value: ast.expr, lineno: int) -> ConstantInfo | None:
|
|
71
|
+
"""Extract constant info from target and value."""
|
|
72
|
+
if not isinstance(target, ast.Name):
|
|
73
|
+
return None
|
|
74
|
+
name = target.id
|
|
75
|
+
if not _is_constant_name(name):
|
|
76
|
+
return None
|
|
77
|
+
return ConstantInfo(name=name, line_number=lineno, value=_get_value_string(value))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _is_constant_name(name: str) -> bool:
|
|
81
|
+
"""Check if name matches constant naming convention."""
|
|
82
|
+
return not name.startswith("_") and bool(CONSTANT_NAME_PATTERN.match(name))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _get_value_string(value: ast.expr) -> str | None:
|
|
86
|
+
"""Get string representation of a value expression."""
|
|
87
|
+
if isinstance(value, ast.Constant):
|
|
88
|
+
return repr(value.value)
|
|
89
|
+
if isinstance(value, ast.Name):
|
|
90
|
+
return value.id
|
|
91
|
+
if isinstance(value, ast.Call):
|
|
92
|
+
return _call_to_string(value)
|
|
93
|
+
return CONTAINER_REPRESENTATIONS.get(type(value))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _call_to_string(node: ast.Call) -> str:
|
|
97
|
+
"""Convert call expression to string."""
|
|
98
|
+
if isinstance(node.func, ast.Name):
|
|
99
|
+
return f"{node.func.id}(...)"
|
|
100
|
+
return "call(...)"
|