thailint 0.5.0__py3-none-any.whl → 0.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +1 -0
- src/analyzers/__init__.py +4 -3
- src/analyzers/ast_utils.py +54 -0
- src/analyzers/rust_base.py +155 -0
- src/analyzers/rust_context.py +141 -0
- src/analyzers/typescript_base.py +4 -0
- src/cli/__init__.py +30 -0
- src/cli/__main__.py +22 -0
- src/cli/config.py +480 -0
- src/cli/config_merge.py +241 -0
- src/cli/linters/__init__.py +67 -0
- src/cli/linters/code_patterns.py +270 -0
- src/cli/linters/code_smells.py +342 -0
- src/cli/linters/documentation.py +83 -0
- src/cli/linters/performance.py +287 -0
- src/cli/linters/shared.py +331 -0
- src/cli/linters/structure.py +327 -0
- src/cli/linters/structure_quality.py +328 -0
- src/cli/main.py +120 -0
- src/cli/utils.py +395 -0
- src/cli_main.py +37 -0
- src/config.py +38 -25
- src/core/base.py +7 -2
- src/core/cli_utils.py +19 -2
- src/core/config_parser.py +5 -2
- src/core/constants.py +54 -0
- src/core/linter_utils.py +95 -6
- src/core/python_lint_rule.py +101 -0
- src/core/registry.py +1 -1
- src/core/rule_discovery.py +147 -84
- src/core/types.py +13 -0
- src/core/violation_builder.py +78 -15
- src/core/violation_utils.py +69 -0
- src/formatters/__init__.py +22 -0
- src/formatters/sarif.py +202 -0
- src/linter_config/directive_markers.py +109 -0
- src/linter_config/ignore.py +254 -395
- src/linter_config/loader.py +45 -12
- src/linter_config/pattern_utils.py +65 -0
- src/linter_config/rule_matcher.py +89 -0
- src/linters/collection_pipeline/__init__.py +90 -0
- src/linters/collection_pipeline/any_all_analyzer.py +281 -0
- src/linters/collection_pipeline/ast_utils.py +40 -0
- src/linters/collection_pipeline/config.py +75 -0
- src/linters/collection_pipeline/continue_analyzer.py +94 -0
- src/linters/collection_pipeline/detector.py +360 -0
- src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
- src/linters/collection_pipeline/linter.py +420 -0
- src/linters/collection_pipeline/suggestion_builder.py +130 -0
- src/linters/cqs/__init__.py +54 -0
- src/linters/cqs/config.py +55 -0
- src/linters/cqs/function_analyzer.py +201 -0
- src/linters/cqs/input_detector.py +139 -0
- src/linters/cqs/linter.py +159 -0
- src/linters/cqs/output_detector.py +84 -0
- src/linters/cqs/python_analyzer.py +54 -0
- src/linters/cqs/types.py +82 -0
- src/linters/cqs/typescript_cqs_analyzer.py +61 -0
- src/linters/cqs/typescript_function_analyzer.py +192 -0
- src/linters/cqs/typescript_input_detector.py +203 -0
- src/linters/cqs/typescript_output_detector.py +117 -0
- src/linters/cqs/violation_builder.py +94 -0
- src/linters/dry/base_token_analyzer.py +16 -9
- src/linters/dry/block_filter.py +120 -20
- src/linters/dry/block_grouper.py +4 -0
- src/linters/dry/cache.py +104 -10
- src/linters/dry/cache_query.py +4 -0
- src/linters/dry/config.py +54 -11
- src/linters/dry/constant.py +92 -0
- src/linters/dry/constant_matcher.py +223 -0
- src/linters/dry/constant_violation_builder.py +98 -0
- src/linters/dry/duplicate_storage.py +5 -4
- src/linters/dry/file_analyzer.py +4 -2
- src/linters/dry/inline_ignore.py +7 -16
- src/linters/dry/linter.py +183 -48
- src/linters/dry/python_analyzer.py +60 -439
- src/linters/dry/python_constant_extractor.py +100 -0
- src/linters/dry/single_statement_detector.py +417 -0
- src/linters/dry/token_hasher.py +116 -112
- src/linters/dry/typescript_analyzer.py +68 -382
- src/linters/dry/typescript_constant_extractor.py +138 -0
- src/linters/dry/typescript_statement_detector.py +255 -0
- src/linters/dry/typescript_value_extractor.py +70 -0
- src/linters/dry/violation_builder.py +4 -0
- src/linters/dry/violation_filter.py +5 -4
- src/linters/dry/violation_generator.py +71 -14
- src/linters/file_header/atemporal_detector.py +68 -50
- src/linters/file_header/base_parser.py +93 -0
- src/linters/file_header/bash_parser.py +66 -0
- src/linters/file_header/config.py +90 -16
- src/linters/file_header/css_parser.py +70 -0
- src/linters/file_header/field_validator.py +36 -33
- src/linters/file_header/linter.py +140 -144
- src/linters/file_header/markdown_parser.py +130 -0
- src/linters/file_header/python_parser.py +14 -58
- src/linters/file_header/typescript_parser.py +73 -0
- src/linters/file_header/violation_builder.py +13 -12
- src/linters/file_placement/config_loader.py +3 -1
- src/linters/file_placement/directory_matcher.py +4 -0
- src/linters/file_placement/linter.py +66 -34
- src/linters/file_placement/pattern_matcher.py +41 -6
- src/linters/file_placement/pattern_validator.py +31 -12
- src/linters/file_placement/rule_checker.py +12 -7
- src/linters/lazy_ignores/__init__.py +43 -0
- src/linters/lazy_ignores/config.py +74 -0
- src/linters/lazy_ignores/directive_utils.py +164 -0
- src/linters/lazy_ignores/header_parser.py +177 -0
- src/linters/lazy_ignores/linter.py +158 -0
- src/linters/lazy_ignores/matcher.py +168 -0
- src/linters/lazy_ignores/python_analyzer.py +209 -0
- src/linters/lazy_ignores/rule_id_utils.py +180 -0
- src/linters/lazy_ignores/skip_detector.py +298 -0
- src/linters/lazy_ignores/types.py +71 -0
- src/linters/lazy_ignores/typescript_analyzer.py +146 -0
- src/linters/lazy_ignores/violation_builder.py +135 -0
- src/linters/lbyl/__init__.py +31 -0
- src/linters/lbyl/config.py +63 -0
- src/linters/lbyl/linter.py +67 -0
- src/linters/lbyl/pattern_detectors/__init__.py +53 -0
- src/linters/lbyl/pattern_detectors/base.py +63 -0
- src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
- src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
- src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
- src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
- src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
- src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
- src/linters/lbyl/python_analyzer.py +215 -0
- src/linters/lbyl/violation_builder.py +354 -0
- src/linters/magic_numbers/context_analyzer.py +227 -225
- src/linters/magic_numbers/linter.py +28 -82
- src/linters/magic_numbers/python_analyzer.py +4 -16
- src/linters/magic_numbers/typescript_analyzer.py +9 -12
- src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
- src/linters/method_property/__init__.py +49 -0
- src/linters/method_property/config.py +138 -0
- src/linters/method_property/linter.py +414 -0
- src/linters/method_property/python_analyzer.py +473 -0
- src/linters/method_property/violation_builder.py +119 -0
- src/linters/nesting/linter.py +24 -16
- src/linters/nesting/python_analyzer.py +4 -0
- src/linters/nesting/typescript_analyzer.py +6 -12
- src/linters/nesting/violation_builder.py +1 -0
- src/linters/performance/__init__.py +91 -0
- src/linters/performance/config.py +43 -0
- src/linters/performance/constants.py +49 -0
- src/linters/performance/linter.py +149 -0
- src/linters/performance/python_analyzer.py +365 -0
- src/linters/performance/regex_analyzer.py +312 -0
- src/linters/performance/regex_linter.py +139 -0
- src/linters/performance/typescript_analyzer.py +236 -0
- src/linters/performance/violation_builder.py +160 -0
- src/linters/print_statements/config.py +7 -12
- src/linters/print_statements/linter.py +26 -43
- src/linters/print_statements/python_analyzer.py +91 -93
- src/linters/print_statements/typescript_analyzer.py +15 -25
- src/linters/print_statements/violation_builder.py +12 -14
- src/linters/srp/class_analyzer.py +11 -7
- src/linters/srp/heuristics.py +56 -22
- src/linters/srp/linter.py +15 -16
- src/linters/srp/python_analyzer.py +55 -20
- src/linters/srp/typescript_metrics_calculator.py +110 -50
- src/linters/stateless_class/__init__.py +25 -0
- src/linters/stateless_class/config.py +58 -0
- src/linters/stateless_class/linter.py +349 -0
- src/linters/stateless_class/python_analyzer.py +290 -0
- src/linters/stringly_typed/__init__.py +36 -0
- src/linters/stringly_typed/config.py +189 -0
- src/linters/stringly_typed/context_filter.py +451 -0
- src/linters/stringly_typed/function_call_violation_builder.py +135 -0
- src/linters/stringly_typed/ignore_checker.py +100 -0
- src/linters/stringly_typed/ignore_utils.py +51 -0
- src/linters/stringly_typed/linter.py +376 -0
- src/linters/stringly_typed/python/__init__.py +33 -0
- src/linters/stringly_typed/python/analyzer.py +348 -0
- src/linters/stringly_typed/python/call_tracker.py +175 -0
- src/linters/stringly_typed/python/comparison_tracker.py +257 -0
- src/linters/stringly_typed/python/condition_extractor.py +134 -0
- src/linters/stringly_typed/python/conditional_detector.py +179 -0
- src/linters/stringly_typed/python/constants.py +21 -0
- src/linters/stringly_typed/python/match_analyzer.py +94 -0
- src/linters/stringly_typed/python/validation_detector.py +189 -0
- src/linters/stringly_typed/python/variable_extractor.py +96 -0
- src/linters/stringly_typed/storage.py +620 -0
- src/linters/stringly_typed/storage_initializer.py +45 -0
- src/linters/stringly_typed/typescript/__init__.py +28 -0
- src/linters/stringly_typed/typescript/analyzer.py +157 -0
- src/linters/stringly_typed/typescript/call_tracker.py +335 -0
- src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
- src/linters/stringly_typed/violation_generator.py +419 -0
- src/orchestrator/core.py +252 -14
- src/orchestrator/language_detector.py +5 -3
- src/templates/thailint_config_template.yaml +196 -0
- src/utils/project_root.py +3 -0
- thailint-0.15.3.dist-info/METADATA +187 -0
- thailint-0.15.3.dist-info/RECORD +226 -0
- thailint-0.15.3.dist-info/entry_points.txt +4 -0
- src/cli.py +0 -1665
- thailint-0.5.0.dist-info/METADATA +0 -1286
- thailint-0.5.0.dist-info/RECORD +0 -96
- thailint-0.5.0.dist-info/entry_points.txt +0 -4
- {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +0 -0
- {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Python AST-based string concatenation in loop detector
|
|
3
|
+
|
|
4
|
+
Scope: Detect O(n²) string building patterns using += in for/while loops
|
|
5
|
+
|
|
6
|
+
Overview: Analyzes Python code to detect string concatenation inside loops using AST traversal.
|
|
7
|
+
Implements heuristic-based detection using variable naming patterns and initialization values.
|
|
8
|
+
Detects `result += str(item)` patterns inside for/while loops that indicate O(n²) complexity.
|
|
9
|
+
Provides suggestions for using join() or list comprehension instead.
|
|
10
|
+
|
|
11
|
+
Dependencies: ast module for Python parsing, constants module for shared patterns
|
|
12
|
+
|
|
13
|
+
Exports: PythonStringConcatAnalyzer class with find_violations method
|
|
14
|
+
|
|
15
|
+
Interfaces: find_violations(tree: ast.AST) -> list[dict] with violation info
|
|
16
|
+
|
|
17
|
+
Implementation: AST visitor pattern detecting augmented assignments in loop contexts
|
|
18
|
+
|
|
19
|
+
Suppressions:
|
|
20
|
+
- srp.violation: Class uses many small methods to achieve A-grade cyclomatic complexity.
|
|
21
|
+
This is an intentional tradeoff - low complexity is prioritized over strict SRP adherence.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import ast
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
|
|
27
|
+
from .constants import STRING_VARIABLE_PATTERNS
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class StringConcatViolation:
|
|
32
|
+
"""Represents a string concatenation violation found in code."""
|
|
33
|
+
|
|
34
|
+
variable_name: str
|
|
35
|
+
line_number: int
|
|
36
|
+
column: int
|
|
37
|
+
loop_type: str # 'for' or 'while'
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# thailint: ignore-next-line[srp.violation] Uses small focused methods to reduce complexity
|
|
41
|
+
class PythonStringConcatAnalyzer:
|
|
42
|
+
"""Detects string concatenation in loops for Python code."""
|
|
43
|
+
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
"""Initialize the analyzer."""
|
|
46
|
+
self._string_variables: set[str] = set()
|
|
47
|
+
self._non_string_variables: set[str] = set() # Lists, numbers, etc.
|
|
48
|
+
|
|
49
|
+
def find_violations(self, tree: ast.AST) -> list[StringConcatViolation]:
|
|
50
|
+
"""Find all string concatenation in loop violations.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
tree: Python AST to analyze
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
List of violations found
|
|
57
|
+
"""
|
|
58
|
+
violations: list[StringConcatViolation] = []
|
|
59
|
+
self._string_variables = set()
|
|
60
|
+
self._non_string_variables = set()
|
|
61
|
+
|
|
62
|
+
# First pass: identify variables initialized as strings or non-strings
|
|
63
|
+
self._identify_string_variables(tree)
|
|
64
|
+
|
|
65
|
+
# Second pass: find += in loops
|
|
66
|
+
self._find_concat_in_loops(tree, violations)
|
|
67
|
+
|
|
68
|
+
return violations
|
|
69
|
+
|
|
70
|
+
def _identify_string_variables(self, tree: ast.AST) -> None:
|
|
71
|
+
"""Identify variables that are initialized as strings or non-strings.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
tree: AST to analyze
|
|
75
|
+
"""
|
|
76
|
+
for node in ast.walk(tree):
|
|
77
|
+
self._process_assignment_node(node)
|
|
78
|
+
|
|
79
|
+
def _process_assignment_node(self, node: ast.AST) -> None:
|
|
80
|
+
"""Process a single assignment node to track variable types."""
|
|
81
|
+
if isinstance(node, ast.Assign):
|
|
82
|
+
self._process_simple_assign(node)
|
|
83
|
+
elif isinstance(node, ast.AnnAssign):
|
|
84
|
+
self._process_annotated_assign(node)
|
|
85
|
+
|
|
86
|
+
def _process_simple_assign(self, node: ast.Assign) -> None:
|
|
87
|
+
"""Process a simple assignment node."""
|
|
88
|
+
for target in node.targets:
|
|
89
|
+
if isinstance(target, ast.Name):
|
|
90
|
+
self._classify_variable(target.id, node.value)
|
|
91
|
+
|
|
92
|
+
def _process_annotated_assign(self, node: ast.AnnAssign) -> None:
|
|
93
|
+
"""Process an annotated assignment node."""
|
|
94
|
+
if node.value and isinstance(node.target, ast.Name):
|
|
95
|
+
self._classify_variable(node.target.id, node.value)
|
|
96
|
+
|
|
97
|
+
def _classify_variable(self, var_name: str, value: ast.expr) -> None:
|
|
98
|
+
"""Classify a variable as string or non-string based on its value."""
|
|
99
|
+
if self._is_string_value(value):
|
|
100
|
+
self._string_variables.add(var_name)
|
|
101
|
+
elif self._is_non_string_value(value):
|
|
102
|
+
self._non_string_variables.add(var_name)
|
|
103
|
+
|
|
104
|
+
def _is_string_value(self, node: ast.expr) -> bool:
|
|
105
|
+
"""Check if an expression is a string value.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
node: Expression node to check
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
True if the expression is a string literal or f-string
|
|
112
|
+
"""
|
|
113
|
+
# String literal: "", '', """..."""
|
|
114
|
+
if isinstance(node, ast.Constant) and isinstance(node.value, str):
|
|
115
|
+
return True
|
|
116
|
+
|
|
117
|
+
# f-string: f"..."
|
|
118
|
+
if isinstance(node, ast.JoinedStr):
|
|
119
|
+
return True
|
|
120
|
+
|
|
121
|
+
return False
|
|
122
|
+
|
|
123
|
+
def _is_non_string_value(self, node: ast.expr) -> bool:
|
|
124
|
+
"""Check if an expression is clearly not a string (list, number, etc).
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
node: Expression node to check
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
True if the expression is clearly not a string
|
|
131
|
+
"""
|
|
132
|
+
# Collection literals: [], {}, set()
|
|
133
|
+
if isinstance(node, (ast.List, ast.Dict, ast.Set)):
|
|
134
|
+
return True
|
|
135
|
+
# Numeric literal: 0, 1.0
|
|
136
|
+
return isinstance(node, ast.Constant) and isinstance(node.value, (int, float))
|
|
137
|
+
|
|
138
|
+
def _find_concat_in_loops(
|
|
139
|
+
self,
|
|
140
|
+
node: ast.AST,
|
|
141
|
+
violations: list[StringConcatViolation],
|
|
142
|
+
in_loop: str | None = None,
|
|
143
|
+
reset_vars: set[str] | None = None,
|
|
144
|
+
) -> None:
|
|
145
|
+
"""Recursively find string concatenation in loops.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
node: Current AST node
|
|
149
|
+
violations: List to append violations to
|
|
150
|
+
in_loop: Type of enclosing loop ('for' or 'while'), None if not in loop
|
|
151
|
+
reset_vars: Variables reset to string values in current loop body
|
|
152
|
+
"""
|
|
153
|
+
if reset_vars is None:
|
|
154
|
+
reset_vars = set()
|
|
155
|
+
|
|
156
|
+
# When entering a new loop, find variables reset in its body
|
|
157
|
+
loop_type = self._get_loop_type(node)
|
|
158
|
+
current_loop: str | None
|
|
159
|
+
current_reset_vars: set[str]
|
|
160
|
+
if loop_type:
|
|
161
|
+
# Find variables assigned to strings in this loop's body
|
|
162
|
+
loop_reset_vars = self._find_vars_reset_in_loop(node)
|
|
163
|
+
current_loop = loop_type
|
|
164
|
+
current_reset_vars = loop_reset_vars
|
|
165
|
+
else:
|
|
166
|
+
current_loop = in_loop
|
|
167
|
+
current_reset_vars = reset_vars
|
|
168
|
+
|
|
169
|
+
self._check_for_string_concat(node, violations, current_loop, current_reset_vars)
|
|
170
|
+
|
|
171
|
+
for child in ast.iter_child_nodes(node):
|
|
172
|
+
self._find_concat_in_loops(child, violations, current_loop, current_reset_vars)
|
|
173
|
+
|
|
174
|
+
def _get_loop_type(self, node: ast.AST) -> str | None:
|
|
175
|
+
"""Get the loop type if node is a loop, else None."""
|
|
176
|
+
if isinstance(node, ast.For):
|
|
177
|
+
return "for"
|
|
178
|
+
if isinstance(node, ast.While):
|
|
179
|
+
return "while"
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
def _find_vars_reset_in_loop(self, loop_node: ast.AST) -> set[str]:
|
|
183
|
+
"""Find variables that are assigned to string values in a loop body.
|
|
184
|
+
|
|
185
|
+
These variables are "reset" each iteration and should not be flagged
|
|
186
|
+
for O(n²) string concatenation since they don't accumulate across iterations.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
loop_node: A For or While loop AST node
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
Set of variable names that are reset to strings in the loop body
|
|
193
|
+
"""
|
|
194
|
+
reset_vars: set[str] = set()
|
|
195
|
+
|
|
196
|
+
# Get the loop body
|
|
197
|
+
if isinstance(loop_node, (ast.For, ast.While)):
|
|
198
|
+
body = loop_node.body
|
|
199
|
+
else:
|
|
200
|
+
return reset_vars
|
|
201
|
+
|
|
202
|
+
# Scan assignments in the loop body (not nested loops)
|
|
203
|
+
for stmt in body:
|
|
204
|
+
self._collect_string_assigns(stmt, reset_vars)
|
|
205
|
+
|
|
206
|
+
return reset_vars
|
|
207
|
+
|
|
208
|
+
def _collect_string_assigns(self, node: ast.AST, reset_vars: set[str]) -> None:
|
|
209
|
+
"""Collect variable names assigned to string values in a node.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
node: AST node to scan
|
|
213
|
+
reset_vars: Set to add found variable names to
|
|
214
|
+
"""
|
|
215
|
+
self._check_simple_assign(node, reset_vars)
|
|
216
|
+
self._check_annotated_assign(node, reset_vars)
|
|
217
|
+
self._recurse_control_flow(node, reset_vars)
|
|
218
|
+
|
|
219
|
+
def _check_simple_assign(self, node: ast.AST, reset_vars: set[str]) -> None:
|
|
220
|
+
"""Check if node is a simple assignment to a string value."""
|
|
221
|
+
if not isinstance(node, ast.Assign):
|
|
222
|
+
return
|
|
223
|
+
for target in node.targets:
|
|
224
|
+
if isinstance(target, ast.Name) and self._is_string_value(node.value):
|
|
225
|
+
reset_vars.add(target.id)
|
|
226
|
+
|
|
227
|
+
def _check_annotated_assign(self, node: ast.AST, reset_vars: set[str]) -> None:
|
|
228
|
+
"""Check if node is an annotated assignment to a string value."""
|
|
229
|
+
if not isinstance(node, ast.AnnAssign):
|
|
230
|
+
return
|
|
231
|
+
if node.value and isinstance(node.target, ast.Name) and self._is_string_value(node.value):
|
|
232
|
+
reset_vars.add(node.target.id)
|
|
233
|
+
|
|
234
|
+
def _recurse_control_flow(self, node: ast.AST, reset_vars: set[str]) -> None:
|
|
235
|
+
"""Recurse into control flow (if/else, try/except) but NOT into nested loops."""
|
|
236
|
+
if isinstance(node, ast.If):
|
|
237
|
+
self._recurse_if_node(node, reset_vars)
|
|
238
|
+
elif isinstance(node, ast.Try):
|
|
239
|
+
self._recurse_try_node(node, reset_vars)
|
|
240
|
+
|
|
241
|
+
def _recurse_if_node(self, node: ast.If, reset_vars: set[str]) -> None:
|
|
242
|
+
"""Recurse into if/else branches."""
|
|
243
|
+
for stmt in node.body + node.orelse:
|
|
244
|
+
self._collect_string_assigns(stmt, reset_vars)
|
|
245
|
+
|
|
246
|
+
def _recurse_try_node(self, node: ast.Try, reset_vars: set[str]) -> None:
|
|
247
|
+
"""Recurse into try/except/finally branches."""
|
|
248
|
+
for stmt in node.body + node.orelse + node.finalbody:
|
|
249
|
+
self._collect_string_assigns(stmt, reset_vars)
|
|
250
|
+
for handler in node.handlers:
|
|
251
|
+
for stmt in handler.body:
|
|
252
|
+
self._collect_string_assigns(stmt, reset_vars)
|
|
253
|
+
|
|
254
|
+
def _check_for_string_concat(
|
|
255
|
+
self,
|
|
256
|
+
node: ast.AST,
|
|
257
|
+
violations: list[StringConcatViolation],
|
|
258
|
+
loop_type: str | None,
|
|
259
|
+
reset_vars: set[str] | None = None,
|
|
260
|
+
) -> None:
|
|
261
|
+
"""Check if node is a string concatenation in a loop and add violation if so."""
|
|
262
|
+
if not self._is_add_aug_assign_in_loop(node, loop_type):
|
|
263
|
+
return
|
|
264
|
+
self._process_aug_assign(node, violations, loop_type or "", reset_vars)
|
|
265
|
+
|
|
266
|
+
def _process_aug_assign(
|
|
267
|
+
self,
|
|
268
|
+
node: ast.AST,
|
|
269
|
+
violations: list[StringConcatViolation],
|
|
270
|
+
loop_type: str,
|
|
271
|
+
reset_vars: set[str] | None,
|
|
272
|
+
) -> None:
|
|
273
|
+
"""Process an augmented assignment node for potential violations."""
|
|
274
|
+
if not isinstance(node, ast.AugAssign) or not isinstance(node.target, ast.Name):
|
|
275
|
+
return
|
|
276
|
+
var_name = node.target.id
|
|
277
|
+
if self._should_skip_reset_var(var_name, reset_vars):
|
|
278
|
+
return
|
|
279
|
+
self._add_string_concat_violation(node, var_name, loop_type, violations)
|
|
280
|
+
|
|
281
|
+
def _should_skip_reset_var(self, var_name: str, reset_vars: set[str] | None) -> bool:
|
|
282
|
+
"""Check if variable is reset in the loop and should be skipped."""
|
|
283
|
+
return reset_vars is not None and var_name in reset_vars
|
|
284
|
+
|
|
285
|
+
def _is_add_aug_assign_in_loop(self, node: ast.AST, loop_type: str | None) -> bool:
|
|
286
|
+
"""Check if node is a += augmented assignment in a loop."""
|
|
287
|
+
if not loop_type or not isinstance(node, ast.AugAssign):
|
|
288
|
+
return False
|
|
289
|
+
return isinstance(node.op, ast.Add) and isinstance(node.target, ast.Name)
|
|
290
|
+
|
|
291
|
+
def _add_string_concat_violation(
|
|
292
|
+
self,
|
|
293
|
+
node: ast.AugAssign,
|
|
294
|
+
var_name: str,
|
|
295
|
+
loop_type: str,
|
|
296
|
+
violations: list[StringConcatViolation],
|
|
297
|
+
) -> None:
|
|
298
|
+
"""Add violation if variable is likely a string."""
|
|
299
|
+
if not self._is_likely_string_variable(var_name, node.value):
|
|
300
|
+
return
|
|
301
|
+
violations.append(
|
|
302
|
+
StringConcatViolation(
|
|
303
|
+
variable_name=var_name,
|
|
304
|
+
line_number=node.lineno,
|
|
305
|
+
column=node.col_offset,
|
|
306
|
+
loop_type=loop_type,
|
|
307
|
+
)
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
def _is_likely_string_variable(self, var_name: str, value: ast.expr) -> bool:
|
|
311
|
+
"""Determine if a variable is likely a string being concatenated.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
var_name: Variable name
|
|
315
|
+
value: Value being added
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
True if this is likely string concatenation
|
|
319
|
+
"""
|
|
320
|
+
if var_name in self._non_string_variables:
|
|
321
|
+
return False
|
|
322
|
+
return (
|
|
323
|
+
self._is_known_string_var(var_name)
|
|
324
|
+
or self._is_string_value(value)
|
|
325
|
+
or self._is_str_call(value)
|
|
326
|
+
or self._is_string_binop(value)
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def _is_known_string_var(self, var_name: str) -> bool:
|
|
330
|
+
"""Check if variable is known or named like a string."""
|
|
331
|
+
return var_name in self._string_variables or var_name.lower() in STRING_VARIABLE_PATTERNS
|
|
332
|
+
|
|
333
|
+
def _is_str_call(self, value: ast.expr) -> bool:
|
|
334
|
+
"""Check if value is a str() call."""
|
|
335
|
+
if not isinstance(value, ast.Call):
|
|
336
|
+
return False
|
|
337
|
+
return isinstance(value.func, ast.Name) and value.func.id == "str"
|
|
338
|
+
|
|
339
|
+
def _is_string_binop(self, value: ast.expr) -> bool:
|
|
340
|
+
"""Check if value is a binary op with string operand."""
|
|
341
|
+
if not isinstance(value, ast.BinOp) or not isinstance(value.op, ast.Add):
|
|
342
|
+
return False
|
|
343
|
+
return self._is_string_value(value.left) or self._is_string_value(value.right)
|
|
344
|
+
|
|
345
|
+
def deduplicate_violations(
|
|
346
|
+
self, violations: list[StringConcatViolation]
|
|
347
|
+
) -> list[StringConcatViolation]:
|
|
348
|
+
"""Deduplicate violations to report one per loop, not per +=.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
violations: List of all violations found
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
Deduplicated list with one violation per variable per loop
|
|
355
|
+
"""
|
|
356
|
+
# Group by variable name and keep first occurrence
|
|
357
|
+
seen: set[str] = set()
|
|
358
|
+
result: list[StringConcatViolation] = []
|
|
359
|
+
|
|
360
|
+
for v in violations:
|
|
361
|
+
if v.variable_name not in seen:
|
|
362
|
+
seen.add(v.variable_name)
|
|
363
|
+
result.append(v)
|
|
364
|
+
|
|
365
|
+
return result
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Python AST-based regex compilation in loop detector
|
|
3
|
+
|
|
4
|
+
Scope: Detect repeated regex compilation patterns using re.method() in for/while loops
|
|
5
|
+
|
|
6
|
+
Overview: Analyzes Python code to detect regex function calls inside loops using AST traversal.
|
|
7
|
+
Detects calls to re.match(), re.search(), re.sub(), re.findall(), re.split(), and
|
|
8
|
+
re.fullmatch() inside for/while loops. Tracks variables assigned from re.compile() to
|
|
9
|
+
avoid false positives when compiled patterns are correctly used. Supports import
|
|
10
|
+
variations including 'import re', 'from re import match', and 'import re as alias'.
|
|
11
|
+
|
|
12
|
+
Dependencies: ast module for Python parsing
|
|
13
|
+
|
|
14
|
+
Exports: PythonRegexInLoopAnalyzer class with find_violations method
|
|
15
|
+
|
|
16
|
+
Interfaces: find_violations(tree: ast.AST) -> list[RegexInLoopViolation]
|
|
17
|
+
|
|
18
|
+
Implementation: AST visitor pattern detecting regex calls in loop contexts with compiled
|
|
19
|
+
pattern tracking
|
|
20
|
+
|
|
21
|
+
Suppressions:
|
|
22
|
+
- srp.violation: Class uses many small methods to achieve A-grade cyclomatic complexity.
|
|
23
|
+
This is an intentional tradeoff - low complexity is prioritized over strict SRP adherence.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import ast
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
|
|
29
|
+
# Regex module functions that compile patterns on each call
|
|
30
|
+
RE_FUNCTIONS = frozenset(
|
|
31
|
+
{
|
|
32
|
+
"match",
|
|
33
|
+
"search",
|
|
34
|
+
"sub",
|
|
35
|
+
"subn",
|
|
36
|
+
"findall",
|
|
37
|
+
"finditer",
|
|
38
|
+
"split",
|
|
39
|
+
"fullmatch",
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class RegexInLoopViolation:
|
|
46
|
+
"""Represents a regex-in-loop violation found in code."""
|
|
47
|
+
|
|
48
|
+
method_name: str
|
|
49
|
+
line_number: int
|
|
50
|
+
column: int
|
|
51
|
+
loop_type: str # 'for' or 'while'
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# thailint: ignore-next-line[srp.violation] Uses small focused methods to reduce complexity
|
|
55
|
+
class PythonRegexInLoopAnalyzer:
|
|
56
|
+
"""Detects regex function calls in loops for Python code."""
|
|
57
|
+
|
|
58
|
+
def __init__(self) -> None:
|
|
59
|
+
"""Initialize the analyzer."""
|
|
60
|
+
self._compiled_patterns: set[str] = set()
|
|
61
|
+
self._re_aliases: set[str] = set() # Module aliases like 'regex' from 'import re as regex'
|
|
62
|
+
self._direct_imports: set[str] = set() # Direct imports like 'match' from 'from re import'
|
|
63
|
+
|
|
64
|
+
def find_violations(self, tree: ast.AST) -> list[RegexInLoopViolation]:
|
|
65
|
+
"""Find all regex-in-loop violations.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
tree: Python AST to analyze
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
List of violations found
|
|
72
|
+
"""
|
|
73
|
+
violations: list[RegexInLoopViolation] = []
|
|
74
|
+
self._compiled_patterns = set()
|
|
75
|
+
self._re_aliases = {"re"} # Default 're' is always valid
|
|
76
|
+
self._direct_imports = set()
|
|
77
|
+
|
|
78
|
+
# First pass: identify imports and compiled patterns
|
|
79
|
+
self._identify_imports(tree)
|
|
80
|
+
self._identify_compiled_patterns(tree)
|
|
81
|
+
|
|
82
|
+
# Second pass: find regex calls in loops
|
|
83
|
+
self._find_regex_in_loops(tree, violations)
|
|
84
|
+
|
|
85
|
+
return violations
|
|
86
|
+
|
|
87
|
+
def _identify_imports(self, tree: ast.AST) -> None:
|
|
88
|
+
"""Identify re module imports and aliases.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
tree: AST to analyze
|
|
92
|
+
"""
|
|
93
|
+
for node in ast.walk(tree):
|
|
94
|
+
self._process_import_node(node)
|
|
95
|
+
|
|
96
|
+
def _process_import_node(self, node: ast.AST) -> None:
|
|
97
|
+
"""Process a single import node."""
|
|
98
|
+
if isinstance(node, ast.Import):
|
|
99
|
+
self._process_regular_import(node)
|
|
100
|
+
elif isinstance(node, ast.ImportFrom):
|
|
101
|
+
self._process_from_import(node)
|
|
102
|
+
|
|
103
|
+
def _process_regular_import(self, node: ast.Import) -> None:
|
|
104
|
+
"""Process 'import re' or 'import re as regex' style imports."""
|
|
105
|
+
for alias in node.names:
|
|
106
|
+
if alias.name == "re":
|
|
107
|
+
# import re as regex -> add 'regex' as valid alias
|
|
108
|
+
self._re_aliases.add(alias.asname or "re")
|
|
109
|
+
|
|
110
|
+
def _process_from_import(self, node: ast.ImportFrom) -> None:
|
|
111
|
+
"""Process 'from re import match' style imports."""
|
|
112
|
+
if node.module != "re":
|
|
113
|
+
return
|
|
114
|
+
for alias in node.names:
|
|
115
|
+
self._add_direct_import_if_re_function(alias)
|
|
116
|
+
|
|
117
|
+
def _add_direct_import_if_re_function(self, alias: ast.alias) -> None:
|
|
118
|
+
"""Add alias to direct imports if it's a regex function."""
|
|
119
|
+
if alias.name not in RE_FUNCTIONS:
|
|
120
|
+
return
|
|
121
|
+
imported_name = alias.asname or alias.name
|
|
122
|
+
self._direct_imports.add(imported_name)
|
|
123
|
+
|
|
124
|
+
def _identify_compiled_patterns(self, tree: ast.AST) -> None:
|
|
125
|
+
"""Identify variables assigned from re.compile().
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
tree: AST to analyze
|
|
129
|
+
"""
|
|
130
|
+
for node in ast.walk(tree):
|
|
131
|
+
self._check_for_compile_assignment(node)
|
|
132
|
+
|
|
133
|
+
def _check_for_compile_assignment(self, node: ast.AST) -> None:
|
|
134
|
+
"""Check if node is an assignment from re.compile()."""
|
|
135
|
+
if isinstance(node, ast.Assign):
|
|
136
|
+
self._process_compile_assign(node)
|
|
137
|
+
elif isinstance(node, ast.AnnAssign):
|
|
138
|
+
self._process_compile_ann_assign(node)
|
|
139
|
+
|
|
140
|
+
def _process_compile_assign(self, node: ast.Assign) -> None:
|
|
141
|
+
"""Process simple assignment for re.compile()."""
|
|
142
|
+
if not self._is_re_compile_call(node.value):
|
|
143
|
+
return
|
|
144
|
+
for target in node.targets:
|
|
145
|
+
self._add_compiled_pattern_if_name(target)
|
|
146
|
+
|
|
147
|
+
def _add_compiled_pattern_if_name(self, target: ast.expr) -> None:
|
|
148
|
+
"""Add target to compiled patterns if it's a Name node."""
|
|
149
|
+
if isinstance(target, ast.Name):
|
|
150
|
+
self._compiled_patterns.add(target.id)
|
|
151
|
+
|
|
152
|
+
def _process_compile_ann_assign(self, node: ast.AnnAssign) -> None:
|
|
153
|
+
"""Process annotated assignment for re.compile()."""
|
|
154
|
+
if node.value and self._is_re_compile_call(node.value):
|
|
155
|
+
if isinstance(node.target, ast.Name):
|
|
156
|
+
self._compiled_patterns.add(node.target.id)
|
|
157
|
+
|
|
158
|
+
def _is_re_compile_call(self, node: ast.expr) -> bool:
|
|
159
|
+
"""Check if expression is a call to re.compile().
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
node: Expression node to check
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
True if this is a re.compile() call
|
|
166
|
+
"""
|
|
167
|
+
if not isinstance(node, ast.Call):
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
func = node.func
|
|
171
|
+
|
|
172
|
+
# re.compile() style
|
|
173
|
+
if isinstance(func, ast.Attribute):
|
|
174
|
+
return self._is_module_compile(func)
|
|
175
|
+
|
|
176
|
+
return False
|
|
177
|
+
|
|
178
|
+
def _is_module_compile(self, func: ast.Attribute) -> bool:
|
|
179
|
+
"""Check if attribute access is module.compile()."""
|
|
180
|
+
if func.attr != "compile":
|
|
181
|
+
return False
|
|
182
|
+
if isinstance(func.value, ast.Name):
|
|
183
|
+
return func.value.id in self._re_aliases
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
def _find_regex_in_loops(
|
|
187
|
+
self,
|
|
188
|
+
node: ast.AST,
|
|
189
|
+
violations: list[RegexInLoopViolation],
|
|
190
|
+
in_loop: str | None = None,
|
|
191
|
+
) -> None:
|
|
192
|
+
"""Recursively find regex calls in loops.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
node: Current AST node
|
|
196
|
+
violations: List to append violations to
|
|
197
|
+
in_loop: Type of enclosing loop ('for' or 'while'), None if not in loop
|
|
198
|
+
"""
|
|
199
|
+
current_loop = self._get_loop_type(node) or in_loop
|
|
200
|
+
self._check_for_regex_call(node, violations, current_loop)
|
|
201
|
+
|
|
202
|
+
for child in ast.iter_child_nodes(node):
|
|
203
|
+
self._find_regex_in_loops(child, violations, current_loop)
|
|
204
|
+
|
|
205
|
+
def _get_loop_type(self, node: ast.AST) -> str | None:
|
|
206
|
+
"""Get the loop type if node is a loop, else None."""
|
|
207
|
+
if isinstance(node, ast.For):
|
|
208
|
+
return "for"
|
|
209
|
+
if isinstance(node, ast.While):
|
|
210
|
+
return "while"
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
def _check_for_regex_call(
|
|
214
|
+
self,
|
|
215
|
+
node: ast.AST,
|
|
216
|
+
violations: list[RegexInLoopViolation],
|
|
217
|
+
loop_type: str | None,
|
|
218
|
+
) -> None:
|
|
219
|
+
"""Check if node is a regex call in a loop and add violation if so."""
|
|
220
|
+
if not loop_type or not isinstance(node, ast.Call):
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
violation = self._create_violation_if_regex_call(node, loop_type)
|
|
224
|
+
if violation:
|
|
225
|
+
violations.append(violation)
|
|
226
|
+
|
|
227
|
+
def _create_violation_if_regex_call(
|
|
228
|
+
self,
|
|
229
|
+
node: ast.Call,
|
|
230
|
+
loop_type: str,
|
|
231
|
+
) -> RegexInLoopViolation | None:
|
|
232
|
+
"""Create violation if this is an uncompiled regex call.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
node: Call node
|
|
236
|
+
loop_type: Type of enclosing loop
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
Violation if uncompiled regex call, None otherwise
|
|
240
|
+
"""
|
|
241
|
+
method_name = self._get_regex_method_name(node)
|
|
242
|
+
if method_name:
|
|
243
|
+
return RegexInLoopViolation(
|
|
244
|
+
method_name=method_name,
|
|
245
|
+
line_number=node.lineno,
|
|
246
|
+
column=node.col_offset,
|
|
247
|
+
loop_type=loop_type,
|
|
248
|
+
)
|
|
249
|
+
return None
|
|
250
|
+
|
|
251
|
+
def _get_regex_method_name(self, node: ast.Call) -> str | None:
|
|
252
|
+
"""Get regex method name if this is an uncompiled regex call.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
node: Call node to check
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
Method name (e.g., 'match', 'search') if uncompiled regex call, None otherwise
|
|
259
|
+
"""
|
|
260
|
+
func = node.func
|
|
261
|
+
|
|
262
|
+
# re.match() style or regex.match() style
|
|
263
|
+
if isinstance(func, ast.Attribute):
|
|
264
|
+
return self._check_module_regex_call(func)
|
|
265
|
+
|
|
266
|
+
# Direct import: match() from 'from re import match'
|
|
267
|
+
if isinstance(func, ast.Name):
|
|
268
|
+
return self._check_direct_import_call(func)
|
|
269
|
+
|
|
270
|
+
return None
|
|
271
|
+
|
|
272
|
+
def _check_module_regex_call(self, func: ast.Attribute) -> str | None:
|
|
273
|
+
"""Check if this is re.method() style call.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
func: Attribute node (e.g., re.match)
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Method name if uncompiled regex call, None otherwise
|
|
280
|
+
"""
|
|
281
|
+
method = func.attr
|
|
282
|
+
|
|
283
|
+
# Not a regex function we care about
|
|
284
|
+
if method not in RE_FUNCTIONS:
|
|
285
|
+
return None
|
|
286
|
+
|
|
287
|
+
# Check if it's called on a compiled pattern variable
|
|
288
|
+
if isinstance(func.value, ast.Name):
|
|
289
|
+
caller = func.value.id
|
|
290
|
+
|
|
291
|
+
# Called on compiled pattern: pattern.match() -> OK
|
|
292
|
+
if caller in self._compiled_patterns:
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
# Called on re module or alias: re.match() -> Violation
|
|
296
|
+
if caller in self._re_aliases:
|
|
297
|
+
return f"re.{method}"
|
|
298
|
+
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
def _check_direct_import_call(self, func: ast.Name) -> str | None:
|
|
302
|
+
"""Check if this is a directly imported regex function call.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
func: Name node (e.g., match)
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Method name if directly imported regex function, None otherwise
|
|
309
|
+
"""
|
|
310
|
+
if func.id in self._direct_imports:
|
|
311
|
+
return func.id
|
|
312
|
+
return None
|