thailint 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +1 -0
- src/cli/__init__.py +27 -0
- src/cli/__main__.py +22 -0
- src/cli/config.py +478 -0
- src/cli/linters/__init__.py +58 -0
- src/cli/linters/code_patterns.py +372 -0
- src/cli/linters/code_smells.py +343 -0
- src/cli/linters/documentation.py +155 -0
- src/cli/linters/shared.py +89 -0
- src/cli/linters/structure.py +313 -0
- src/cli/linters/structure_quality.py +316 -0
- src/cli/main.py +120 -0
- src/cli/utils.py +375 -0
- src/cli_main.py +34 -0
- src/config.py +2 -3
- src/core/rule_discovery.py +43 -10
- src/core/types.py +13 -0
- src/core/violation_utils.py +69 -0
- src/linter_config/ignore.py +32 -16
- src/linters/collection_pipeline/__init__.py +90 -0
- src/linters/collection_pipeline/config.py +63 -0
- src/linters/collection_pipeline/continue_analyzer.py +100 -0
- src/linters/collection_pipeline/detector.py +130 -0
- src/linters/collection_pipeline/linter.py +437 -0
- src/linters/collection_pipeline/suggestion_builder.py +63 -0
- src/linters/dry/block_filter.py +99 -9
- src/linters/dry/cache.py +94 -6
- src/linters/dry/config.py +47 -10
- src/linters/dry/constant.py +92 -0
- src/linters/dry/constant_matcher.py +214 -0
- src/linters/dry/constant_violation_builder.py +98 -0
- src/linters/dry/linter.py +89 -48
- src/linters/dry/python_analyzer.py +44 -431
- src/linters/dry/python_constant_extractor.py +101 -0
- src/linters/dry/single_statement_detector.py +415 -0
- src/linters/dry/token_hasher.py +5 -5
- src/linters/dry/typescript_analyzer.py +63 -382
- src/linters/dry/typescript_constant_extractor.py +134 -0
- src/linters/dry/typescript_statement_detector.py +255 -0
- src/linters/dry/typescript_value_extractor.py +66 -0
- src/linters/file_header/linter.py +9 -13
- src/linters/file_placement/linter.py +30 -10
- src/linters/file_placement/pattern_matcher.py +19 -5
- src/linters/magic_numbers/linter.py +8 -67
- src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
- src/linters/nesting/linter.py +12 -9
- src/linters/print_statements/linter.py +7 -24
- src/linters/srp/class_analyzer.py +9 -9
- src/linters/srp/heuristics.py +6 -5
- src/linters/srp/linter.py +4 -5
- src/linters/stateless_class/linter.py +2 -2
- src/linters/stringly_typed/__init__.py +23 -0
- src/linters/stringly_typed/config.py +165 -0
- src/linters/stringly_typed/python/__init__.py +29 -0
- src/linters/stringly_typed/python/analyzer.py +198 -0
- src/linters/stringly_typed/python/condition_extractor.py +131 -0
- src/linters/stringly_typed/python/conditional_detector.py +176 -0
- src/linters/stringly_typed/python/constants.py +21 -0
- src/linters/stringly_typed/python/match_analyzer.py +88 -0
- src/linters/stringly_typed/python/validation_detector.py +186 -0
- src/linters/stringly_typed/python/variable_extractor.py +96 -0
- src/orchestrator/core.py +241 -12
- {thailint-0.9.0.dist-info → thailint-0.11.0.dist-info}/METADATA +116 -3
- {thailint-0.9.0.dist-info → thailint-0.11.0.dist-info}/RECORD +67 -29
- thailint-0.11.0.dist-info/entry_points.txt +4 -0
- src/cli.py +0 -2014
- thailint-0.9.0.dist-info/entry_points.txt +0 -4
- {thailint-0.9.0.dist-info → thailint-0.11.0.dist-info}/WHEEL +0 -0
- {thailint-0.9.0.dist-info → thailint-0.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Extract string comparisons from Python condition expressions
|
|
3
|
+
|
|
4
|
+
Scope: Parse BoolOp and Compare nodes to extract string equality patterns
|
|
5
|
+
|
|
6
|
+
Overview: Provides functions to extract string comparisons from condition expressions
|
|
7
|
+
in Python AST. Handles simple comparisons, or-combined, and and-combined
|
|
8
|
+
conditions. Updates a collector object with extracted variable names and
|
|
9
|
+
string values. Separated from main detector to reduce complexity.
|
|
10
|
+
|
|
11
|
+
Dependencies: ast module, variable_extractor
|
|
12
|
+
|
|
13
|
+
Exports: extract_from_condition, is_simple_string_equality, get_string_constant
|
|
14
|
+
|
|
15
|
+
Interfaces: Functions for extracting string comparisons from AST nodes
|
|
16
|
+
|
|
17
|
+
Implementation: Recursive traversal of BoolOp nodes with Compare extraction
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import ast
|
|
21
|
+
|
|
22
|
+
from .variable_extractor import extract_variable_name
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def extract_from_condition(
|
|
26
|
+
test: ast.expr,
|
|
27
|
+
collector: object,
|
|
28
|
+
) -> None:
|
|
29
|
+
"""Extract string comparisons from a condition expression.
|
|
30
|
+
|
|
31
|
+
Handles simple comparisons, or-combined, and and-combined comparisons.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
test: The test expression from an if/elif
|
|
35
|
+
collector: Collector to accumulate results into (must have variable_name
|
|
36
|
+
and string_values attributes)
|
|
37
|
+
"""
|
|
38
|
+
if isinstance(test, ast.BoolOp):
|
|
39
|
+
_extract_from_bool_op(test, collector)
|
|
40
|
+
elif isinstance(test, ast.Compare):
|
|
41
|
+
_extract_from_compare(test, collector)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _extract_from_bool_op(node: ast.BoolOp, collector: object) -> None:
|
|
45
|
+
"""Extract from BoolOp (And/Or combined comparisons).
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
node: BoolOp node
|
|
49
|
+
collector: Collector to accumulate results into
|
|
50
|
+
"""
|
|
51
|
+
for value in node.values:
|
|
52
|
+
_handle_bool_op_value(value, collector)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _handle_bool_op_value(value: ast.expr, collector: object) -> None:
|
|
56
|
+
"""Handle a single value from a BoolOp node.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
value: Expression value from BoolOp
|
|
60
|
+
collector: Collector to accumulate results into
|
|
61
|
+
"""
|
|
62
|
+
if isinstance(value, ast.Compare):
|
|
63
|
+
_extract_from_compare(value, collector)
|
|
64
|
+
elif isinstance(value, ast.BoolOp):
|
|
65
|
+
_extract_from_bool_op(value, collector)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _extract_from_compare(node: ast.Compare, collector: object) -> None:
|
|
69
|
+
"""Extract string value from a Compare node with Eq/NotEq.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
node: Compare node to analyze
|
|
73
|
+
collector: Collector to accumulate results into
|
|
74
|
+
"""
|
|
75
|
+
if not _is_simple_equality(node):
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
string_value = _get_string_constant(node)
|
|
79
|
+
if string_value is None:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
var_name = extract_variable_name(node.left)
|
|
83
|
+
_update_collector(collector, var_name, string_value)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _is_simple_equality(node: ast.Compare) -> bool:
|
|
87
|
+
"""Check if Compare is a simple equality with one operator.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
node: Compare node to check
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
True if it's a simple x == y or x != y comparison
|
|
94
|
+
"""
|
|
95
|
+
if len(node.ops) != 1:
|
|
96
|
+
return False
|
|
97
|
+
return isinstance(node.ops[0], (ast.Eq, ast.NotEq))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _get_string_constant(node: ast.Compare) -> str | None:
|
|
101
|
+
"""Get string constant from the right side of comparison.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
node: Compare node to extract from
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
String value if comparator is a string constant, None otherwise
|
|
108
|
+
"""
|
|
109
|
+
comparator = node.comparators[0]
|
|
110
|
+
if isinstance(comparator, ast.Constant) and isinstance(comparator.value, str):
|
|
111
|
+
return comparator.value
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _update_collector(
|
|
116
|
+
collector: object,
|
|
117
|
+
var_name: str | None,
|
|
118
|
+
string_value: str,
|
|
119
|
+
) -> None:
|
|
120
|
+
"""Update collector with extracted variable and value.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
collector: Collector to update
|
|
124
|
+
var_name: Variable name from comparison
|
|
125
|
+
string_value: String value from comparison
|
|
126
|
+
"""
|
|
127
|
+
if collector.variable_name is None: # type: ignore[attr-defined]
|
|
128
|
+
collector.variable_name = var_name # type: ignore[attr-defined]
|
|
129
|
+
# Only add if same variable (or no variable tracking)
|
|
130
|
+
if collector.variable_name == var_name or var_name is None: # type: ignore[attr-defined]
|
|
131
|
+
collector.string_values.add(string_value) # type: ignore[attr-defined]
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Detect equality chain patterns in Python AST
|
|
3
|
+
|
|
4
|
+
Scope: Find 'if x == "a" elif x == "b"', or-combined, and match statement patterns
|
|
5
|
+
|
|
6
|
+
Overview: Provides ConditionalPatternDetector class that traverses Python AST to find
|
|
7
|
+
equality chain patterns where strings are used instead of enums. Detects single
|
|
8
|
+
equality comparisons with string constants, aggregates values from if/elif chains,
|
|
9
|
+
handles or-combined comparisons, and supports Python 3.10+ match statements.
|
|
10
|
+
Returns structured EqualityChainPattern dataclass instances with aggregated
|
|
11
|
+
string values, pattern type, location, and optional variable name.
|
|
12
|
+
|
|
13
|
+
Dependencies: ast module for AST parsing, dataclasses for pattern structure,
|
|
14
|
+
condition_extractor for comparison extraction, match_analyzer for match statements
|
|
15
|
+
|
|
16
|
+
Exports: ConditionalPatternDetector class, EqualityChainPattern dataclass
|
|
17
|
+
|
|
18
|
+
Interfaces: ConditionalPatternDetector.find_patterns(tree) -> list[EqualityChainPattern]
|
|
19
|
+
|
|
20
|
+
Implementation: AST NodeVisitor pattern with If node chain traversal and Match statement handling
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import ast
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
from typing import TYPE_CHECKING
|
|
26
|
+
|
|
27
|
+
from .condition_extractor import extract_from_condition
|
|
28
|
+
from .constants import MIN_VALUES_FOR_PATTERN
|
|
29
|
+
from .match_analyzer import analyze_match_statement
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from collections.abc import Iterator
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class EqualityChainPattern:
|
|
37
|
+
"""Represents a detected equality chain pattern.
|
|
38
|
+
|
|
39
|
+
Captures information about stringly-typed equality checks including aggregated
|
|
40
|
+
string values from chains, pattern type, source location, and variable name.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
string_values: set[str]
|
|
44
|
+
"""Set of string values aggregated from the equality chain."""
|
|
45
|
+
|
|
46
|
+
pattern_type: str
|
|
47
|
+
"""Type of pattern: 'equality_chain', 'or_combined', or 'match_statement'."""
|
|
48
|
+
|
|
49
|
+
line_number: int
|
|
50
|
+
"""Line number where the pattern starts (1-indexed)."""
|
|
51
|
+
|
|
52
|
+
column: int
|
|
53
|
+
"""Column number where the pattern starts (0-indexed)."""
|
|
54
|
+
|
|
55
|
+
variable_name: str | None
|
|
56
|
+
"""Variable name being compared, if identifiable from a simple expression."""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class _ChainCollector:
|
|
61
|
+
"""Internal collector for aggregating values from if/elif chains."""
|
|
62
|
+
|
|
63
|
+
variable_name: str | None = None
|
|
64
|
+
string_values: set[str] = field(default_factory=set)
|
|
65
|
+
line_number: int = 0
|
|
66
|
+
column: int = 0
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ConditionalPatternDetector(ast.NodeVisitor):
|
|
70
|
+
"""Detects equality chain patterns in Python AST.
|
|
71
|
+
|
|
72
|
+
Finds patterns like 'if x == "a" elif x == "b"', or-combined comparisons,
|
|
73
|
+
and match statements where strings are used instead of proper enums.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self) -> None:
|
|
77
|
+
"""Initialize the detector."""
|
|
78
|
+
self.patterns: list[EqualityChainPattern] = []
|
|
79
|
+
self._processed_if_nodes: set[int] = set()
|
|
80
|
+
|
|
81
|
+
def find_patterns(self, tree: ast.AST) -> list[EqualityChainPattern]:
|
|
82
|
+
"""Find all equality chain patterns in the AST.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
tree: The AST to analyze
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List of EqualityChainPattern instances for each detected pattern
|
|
89
|
+
"""
|
|
90
|
+
self.patterns = []
|
|
91
|
+
self._processed_if_nodes = set()
|
|
92
|
+
self.visit(tree)
|
|
93
|
+
return self.patterns
|
|
94
|
+
|
|
95
|
+
def visit_If(self, node: ast.If) -> None: # pylint: disable=invalid-name
|
|
96
|
+
"""Visit an If node to check for equality chain patterns.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
node: The If node to analyze
|
|
100
|
+
"""
|
|
101
|
+
if id(node) not in self._processed_if_nodes:
|
|
102
|
+
self._analyze_if_chain(node)
|
|
103
|
+
self.generic_visit(node)
|
|
104
|
+
|
|
105
|
+
def visit_Match(self, node: ast.Match) -> None: # pylint: disable=invalid-name
|
|
106
|
+
"""Visit a Match node to check for string case patterns.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
node: The Match node to analyze
|
|
110
|
+
"""
|
|
111
|
+
pattern = analyze_match_statement(node, EqualityChainPattern)
|
|
112
|
+
if pattern is not None:
|
|
113
|
+
self.patterns.append(pattern) # type: ignore[arg-type]
|
|
114
|
+
self.generic_visit(node)
|
|
115
|
+
|
|
116
|
+
def _analyze_if_chain(self, node: ast.If) -> None:
|
|
117
|
+
"""Analyze an if/elif chain for equality patterns.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
node: The starting If node of the chain
|
|
121
|
+
"""
|
|
122
|
+
collector = _ChainCollector(line_number=node.lineno, column=node.col_offset)
|
|
123
|
+
|
|
124
|
+
for if_node in self._iter_if_chain(node):
|
|
125
|
+
self._processed_if_nodes.add(id(if_node))
|
|
126
|
+
extract_from_condition(if_node.test, collector)
|
|
127
|
+
|
|
128
|
+
self._emit_pattern_if_valid(collector)
|
|
129
|
+
|
|
130
|
+
def _iter_if_chain(self, node: ast.If) -> "Iterator[ast.If]":
|
|
131
|
+
"""Iterate through an if/elif chain.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
node: Starting If node
|
|
135
|
+
|
|
136
|
+
Yields:
|
|
137
|
+
Each If node in the chain including elif branches
|
|
138
|
+
"""
|
|
139
|
+
yield node
|
|
140
|
+
current: ast.If | None = node
|
|
141
|
+
|
|
142
|
+
while current is not None:
|
|
143
|
+
current = self._get_next_elif(current)
|
|
144
|
+
if current is not None:
|
|
145
|
+
yield current
|
|
146
|
+
|
|
147
|
+
def _get_next_elif(self, node: ast.If) -> ast.If | None:
|
|
148
|
+
"""Get the next elif node in a chain.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
node: Current If node
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Next elif If node, or None if no elif exists
|
|
155
|
+
"""
|
|
156
|
+
if len(node.orelse) == 1 and isinstance(node.orelse[0], ast.If):
|
|
157
|
+
return node.orelse[0]
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
def _emit_pattern_if_valid(self, collector: _ChainCollector) -> None:
|
|
161
|
+
"""Emit a pattern if collector has sufficient values.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
collector: Collector with aggregated values
|
|
165
|
+
"""
|
|
166
|
+
if len(collector.string_values) < MIN_VALUES_FOR_PATTERN:
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
pattern = EqualityChainPattern(
|
|
170
|
+
string_values=collector.string_values,
|
|
171
|
+
pattern_type="equality_chain",
|
|
172
|
+
line_number=collector.line_number,
|
|
173
|
+
column=collector.column,
|
|
174
|
+
variable_name=collector.variable_name,
|
|
175
|
+
)
|
|
176
|
+
self.patterns.append(pattern)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Shared constants for stringly-typed Python detection
|
|
3
|
+
|
|
4
|
+
Scope: Common configuration values used across Python pattern detectors
|
|
5
|
+
|
|
6
|
+
Overview: Provides shared constants used by MembershipValidationDetector,
|
|
7
|
+
ConditionalPatternDetector, and other Python detection components.
|
|
8
|
+
Centralizes configuration values to ensure consistency and avoid
|
|
9
|
+
duplication across detector implementations.
|
|
10
|
+
|
|
11
|
+
Dependencies: None
|
|
12
|
+
|
|
13
|
+
Exports: MIN_VALUES_FOR_PATTERN constant
|
|
14
|
+
|
|
15
|
+
Interfaces: Constants only, no function interfaces
|
|
16
|
+
|
|
17
|
+
Implementation: Simple module-level constant definitions
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
# Minimum number of string values to consider as enum candidate
|
|
21
|
+
MIN_VALUES_FOR_PATTERN = 2
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Analyze Python match statements for stringly-typed patterns
|
|
3
|
+
|
|
4
|
+
Scope: Extract string values from match statement cases
|
|
5
|
+
|
|
6
|
+
Overview: Provides MatchStatementAnalyzer class that analyzes Python 3.10+ match
|
|
7
|
+
statements to detect stringly-typed patterns. Extracts string values from
|
|
8
|
+
case patterns and returns structured results. Separated from main detector
|
|
9
|
+
to maintain single responsibility and reduce class complexity.
|
|
10
|
+
|
|
11
|
+
Dependencies: ast module, constants module, variable_extractor
|
|
12
|
+
|
|
13
|
+
Exports: MatchStatementAnalyzer class
|
|
14
|
+
|
|
15
|
+
Interfaces: MatchStatementAnalyzer.analyze(node) -> EqualityChainPattern | None
|
|
16
|
+
|
|
17
|
+
Implementation: AST pattern matching for MatchValue nodes with string constants
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import ast
|
|
21
|
+
|
|
22
|
+
from .constants import MIN_VALUES_FOR_PATTERN
|
|
23
|
+
from .variable_extractor import extract_variable_name
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def analyze_match_statement(
|
|
27
|
+
node: ast.Match,
|
|
28
|
+
pattern_class: type,
|
|
29
|
+
) -> object | None:
|
|
30
|
+
"""Analyze a match statement for string case patterns.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
node: Match statement node to analyze
|
|
34
|
+
pattern_class: The EqualityChainPattern class to use for results
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Pattern instance if valid match found, None otherwise
|
|
38
|
+
"""
|
|
39
|
+
string_values = _collect_string_cases(node.cases)
|
|
40
|
+
|
|
41
|
+
if len(string_values) < MIN_VALUES_FOR_PATTERN:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
var_name = extract_variable_name(node.subject)
|
|
45
|
+
return pattern_class(
|
|
46
|
+
string_values=string_values,
|
|
47
|
+
pattern_type="match_statement",
|
|
48
|
+
line_number=node.lineno,
|
|
49
|
+
column=node.col_offset,
|
|
50
|
+
variable_name=var_name,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _collect_string_cases(cases: list[ast.match_case]) -> set[str]:
|
|
55
|
+
"""Collect string values from match cases.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
cases: List of match_case nodes
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Set of string values from MatchValue patterns
|
|
62
|
+
"""
|
|
63
|
+
string_values: set[str] = set()
|
|
64
|
+
|
|
65
|
+
for case in cases:
|
|
66
|
+
value = _extract_case_string_value(case.pattern)
|
|
67
|
+
if value is not None:
|
|
68
|
+
string_values.add(value)
|
|
69
|
+
|
|
70
|
+
return string_values
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _extract_case_string_value(pattern: ast.pattern) -> str | None:
|
|
74
|
+
"""Extract string value from a case pattern.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
pattern: Match case pattern node
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
String value if pattern is a MatchValue with string, None otherwise
|
|
81
|
+
"""
|
|
82
|
+
if not isinstance(pattern, ast.MatchValue):
|
|
83
|
+
return None
|
|
84
|
+
if not isinstance(pattern.value, ast.Constant):
|
|
85
|
+
return None
|
|
86
|
+
if not isinstance(pattern.value.value, str):
|
|
87
|
+
return None
|
|
88
|
+
return pattern.value.value
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Detect membership validation patterns in Python AST
|
|
3
|
+
|
|
4
|
+
Scope: Find 'x in ("a", "b")' and 'x not in (...)' patterns
|
|
5
|
+
|
|
6
|
+
Overview: Provides MembershipValidationDetector class that traverses Python AST to find
|
|
7
|
+
membership validation patterns where strings are used instead of enums. Detects
|
|
8
|
+
Compare nodes with In/NotIn operators and string literal collections (tuple, set,
|
|
9
|
+
list). Returns structured MembershipPattern dataclass instances with string values,
|
|
10
|
+
operator type, location, and optional variable name. Filters out non-string
|
|
11
|
+
collections, single-element collections, and variable references.
|
|
12
|
+
|
|
13
|
+
Dependencies: ast module for AST parsing, dataclasses for pattern structure,
|
|
14
|
+
variable_extractor for variable name extraction
|
|
15
|
+
|
|
16
|
+
Exports: MembershipValidationDetector class, MembershipPattern dataclass
|
|
17
|
+
|
|
18
|
+
Interfaces: MembershipValidationDetector.find_patterns(tree) -> list[MembershipPattern]
|
|
19
|
+
|
|
20
|
+
Implementation: AST NodeVisitor pattern with Compare node handling for In/NotIn operators
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import ast
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
|
|
26
|
+
from .constants import MIN_VALUES_FOR_PATTERN
|
|
27
|
+
from .variable_extractor import extract_variable_name
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class MembershipPattern:
|
|
32
|
+
"""Represents a detected membership validation pattern.
|
|
33
|
+
|
|
34
|
+
Captures the essential information about a stringly-typed membership check
|
|
35
|
+
including the string values being compared, the operator used, source location,
|
|
36
|
+
and the variable being tested if identifiable.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
string_values: set[str]
|
|
40
|
+
"""Set of string values in the membership test."""
|
|
41
|
+
|
|
42
|
+
operator: str
|
|
43
|
+
"""Operator used: 'in' or 'not in'."""
|
|
44
|
+
|
|
45
|
+
line_number: int
|
|
46
|
+
"""Line number where the pattern occurs (1-indexed)."""
|
|
47
|
+
|
|
48
|
+
column: int
|
|
49
|
+
"""Column number where the pattern starts (0-indexed)."""
|
|
50
|
+
|
|
51
|
+
variable_name: str | None
|
|
52
|
+
"""Variable name being tested, if identifiable from a simple expression."""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class MembershipValidationDetector(ast.NodeVisitor):
|
|
56
|
+
"""Detects membership validation patterns in Python AST.
|
|
57
|
+
|
|
58
|
+
Finds patterns like 'x in ("a", "b")' and 'x not in {"c", "d"}' where
|
|
59
|
+
strings are used for validation instead of proper enums.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self) -> None:
|
|
63
|
+
"""Initialize the detector."""
|
|
64
|
+
self.patterns: list[MembershipPattern] = []
|
|
65
|
+
|
|
66
|
+
def find_patterns(self, tree: ast.AST) -> list[MembershipPattern]:
|
|
67
|
+
"""Find all membership validation patterns in the AST.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
tree: The AST to analyze
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
List of MembershipPattern instances for each detected pattern
|
|
74
|
+
"""
|
|
75
|
+
self.patterns = []
|
|
76
|
+
self.visit(tree)
|
|
77
|
+
return self.patterns
|
|
78
|
+
|
|
79
|
+
def visit_Compare(self, node: ast.Compare) -> None: # pylint: disable=invalid-name
|
|
80
|
+
"""Visit a Compare node to check for membership patterns.
|
|
81
|
+
|
|
82
|
+
Handles Compare nodes with In or NotIn operators where the
|
|
83
|
+
comparator is a literal collection of strings.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
node: The Compare node to analyze
|
|
87
|
+
"""
|
|
88
|
+
for op_index, operator in enumerate(node.ops):
|
|
89
|
+
self._check_membership_operator(node, operator, op_index)
|
|
90
|
+
self.generic_visit(node)
|
|
91
|
+
|
|
92
|
+
def _check_membership_operator(
|
|
93
|
+
self, node: ast.Compare, operator: ast.cmpop, op_index: int
|
|
94
|
+
) -> None:
|
|
95
|
+
"""Check if an operator forms a valid membership pattern.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
node: The Compare node containing the operator
|
|
99
|
+
operator: The comparison operator to check
|
|
100
|
+
op_index: Index of the operator in the Compare node
|
|
101
|
+
"""
|
|
102
|
+
if not isinstance(operator, (ast.In, ast.NotIn)):
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
comparator = node.comparators[op_index]
|
|
106
|
+
string_values = _extract_string_values(comparator)
|
|
107
|
+
|
|
108
|
+
if string_values is None or len(string_values) < MIN_VALUES_FOR_PATTERN:
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
self._add_pattern(node, operator, string_values)
|
|
112
|
+
|
|
113
|
+
def _add_pattern(self, node: ast.Compare, operator: ast.cmpop, string_values: set[str]) -> None:
|
|
114
|
+
"""Create and add a membership pattern to results.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
node: The Compare node containing the pattern
|
|
118
|
+
operator: The In or NotIn operator
|
|
119
|
+
string_values: Set of string values detected
|
|
120
|
+
"""
|
|
121
|
+
operator_str = "in" if isinstance(operator, ast.In) else "not in"
|
|
122
|
+
variable_name = extract_variable_name(node.left)
|
|
123
|
+
|
|
124
|
+
pattern = MembershipPattern(
|
|
125
|
+
string_values=string_values,
|
|
126
|
+
operator=operator_str,
|
|
127
|
+
line_number=node.lineno,
|
|
128
|
+
column=node.col_offset,
|
|
129
|
+
variable_name=variable_name,
|
|
130
|
+
)
|
|
131
|
+
self.patterns.append(pattern)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _extract_string_values(node: ast.AST) -> set[str] | None:
|
|
135
|
+
"""Extract string values from a collection literal.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
node: AST node representing the collection
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Set of string values if all elements are strings, None otherwise
|
|
142
|
+
"""
|
|
143
|
+
elements = _get_collection_elements(node)
|
|
144
|
+
if elements is None or len(elements) == 0:
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
return _collect_string_constants(elements)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _get_collection_elements(node: ast.AST) -> list[ast.expr] | None:
|
|
151
|
+
"""Get elements from a collection literal node.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
node: AST node that may be a collection literal
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
List of element nodes if node is a collection, None otherwise
|
|
158
|
+
"""
|
|
159
|
+
if isinstance(node, ast.Tuple):
|
|
160
|
+
return list(node.elts)
|
|
161
|
+
if isinstance(node, ast.Set):
|
|
162
|
+
return list(node.elts)
|
|
163
|
+
if isinstance(node, ast.List):
|
|
164
|
+
return list(node.elts)
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _collect_string_constants(elements: list[ast.expr]) -> set[str] | None:
|
|
169
|
+
"""Collect string constants from a list of AST expression nodes.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
elements: List of expression nodes from a collection
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Set of string values if all elements are string constants, None otherwise
|
|
176
|
+
"""
|
|
177
|
+
string_values: set[str] = set()
|
|
178
|
+
|
|
179
|
+
for element in elements:
|
|
180
|
+
if not isinstance(element, ast.Constant):
|
|
181
|
+
return None
|
|
182
|
+
if not isinstance(element.value, str):
|
|
183
|
+
return None
|
|
184
|
+
string_values.add(element.value)
|
|
185
|
+
|
|
186
|
+
return string_values
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Extract variable names from Python AST nodes
|
|
3
|
+
|
|
4
|
+
Scope: AST node analysis for identifying variable names in expressions
|
|
5
|
+
|
|
6
|
+
Overview: Provides functions to extract variable names from various Python AST expression
|
|
7
|
+
types including simple names, attribute access chains, and method calls. Handles
|
|
8
|
+
complex expressions by returning None when the variable cannot be simply identified.
|
|
9
|
+
Supports extraction from Name nodes, Attribute chains (e.g., self.status), and Call
|
|
10
|
+
nodes for method calls (e.g., x.lower()).
|
|
11
|
+
|
|
12
|
+
Dependencies: ast module for AST node types
|
|
13
|
+
|
|
14
|
+
Exports: extract_variable_name, extract_attribute_chain functions
|
|
15
|
+
|
|
16
|
+
Interfaces: extract_variable_name(node) -> str | None for general extraction,
|
|
17
|
+
extract_attribute_chain(node) -> str for attribute chain extraction
|
|
18
|
+
|
|
19
|
+
Implementation: Pattern matching on AST node types with recursive chain handling
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import ast
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def extract_variable_name(node: ast.AST) -> str | None:
|
|
26
|
+
"""Extract variable name from an expression node.
|
|
27
|
+
|
|
28
|
+
Identifies the variable being used in an expression, handling
|
|
29
|
+
simple names, attribute access, and method calls.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
node: AST node representing an expression
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Variable name if identifiable, None for complex expressions
|
|
36
|
+
"""
|
|
37
|
+
if isinstance(node, ast.Name):
|
|
38
|
+
return node.id
|
|
39
|
+
|
|
40
|
+
if isinstance(node, ast.Attribute):
|
|
41
|
+
return extract_attribute_chain(node)
|
|
42
|
+
|
|
43
|
+
if isinstance(node, ast.Call):
|
|
44
|
+
return _extract_call_variable(node)
|
|
45
|
+
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def extract_attribute_chain(node: ast.Attribute) -> str:
|
|
50
|
+
"""Extract full attribute chain as string.
|
|
51
|
+
|
|
52
|
+
Builds a dotted string representation of attribute access,
|
|
53
|
+
e.g., 'self.status' or 'obj.attr.subattr'.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
node: Attribute node to extract from
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
String representation of attribute chain
|
|
60
|
+
"""
|
|
61
|
+
parts: list[str] = [node.attr]
|
|
62
|
+
current = node.value
|
|
63
|
+
|
|
64
|
+
while isinstance(current, ast.Attribute):
|
|
65
|
+
parts.append(current.attr)
|
|
66
|
+
current = current.value
|
|
67
|
+
|
|
68
|
+
if isinstance(current, ast.Name):
|
|
69
|
+
parts.append(current.id)
|
|
70
|
+
|
|
71
|
+
parts.reverse()
|
|
72
|
+
return ".".join(parts)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _extract_call_variable(node: ast.Call) -> str | None:
|
|
76
|
+
"""Extract variable from a method call expression.
|
|
77
|
+
|
|
78
|
+
For expressions like x.lower(), returns 'x'.
|
|
79
|
+
For complex calls like get_value().lower(), returns None.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
node: Call node to extract from
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Variable name if identifiable, None otherwise
|
|
86
|
+
"""
|
|
87
|
+
if not isinstance(node.func, ast.Attribute):
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
value = node.func.value
|
|
91
|
+
if isinstance(value, ast.Name):
|
|
92
|
+
return value.id
|
|
93
|
+
if isinstance(value, ast.Attribute):
|
|
94
|
+
return extract_attribute_chain(value)
|
|
95
|
+
|
|
96
|
+
return None
|