thailint 0.5.0__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. src/__init__.py +1 -0
  2. src/analyzers/__init__.py +4 -3
  3. src/analyzers/ast_utils.py +54 -0
  4. src/analyzers/rust_base.py +155 -0
  5. src/analyzers/rust_context.py +141 -0
  6. src/analyzers/typescript_base.py +4 -0
  7. src/cli/__init__.py +30 -0
  8. src/cli/__main__.py +22 -0
  9. src/cli/config.py +480 -0
  10. src/cli/config_merge.py +241 -0
  11. src/cli/linters/__init__.py +67 -0
  12. src/cli/linters/code_patterns.py +270 -0
  13. src/cli/linters/code_smells.py +342 -0
  14. src/cli/linters/documentation.py +83 -0
  15. src/cli/linters/performance.py +287 -0
  16. src/cli/linters/shared.py +331 -0
  17. src/cli/linters/structure.py +327 -0
  18. src/cli/linters/structure_quality.py +328 -0
  19. src/cli/main.py +120 -0
  20. src/cli/utils.py +395 -0
  21. src/cli_main.py +37 -0
  22. src/config.py +38 -25
  23. src/core/base.py +7 -2
  24. src/core/cli_utils.py +19 -2
  25. src/core/config_parser.py +5 -2
  26. src/core/constants.py +54 -0
  27. src/core/linter_utils.py +95 -6
  28. src/core/python_lint_rule.py +101 -0
  29. src/core/registry.py +1 -1
  30. src/core/rule_discovery.py +147 -84
  31. src/core/types.py +13 -0
  32. src/core/violation_builder.py +78 -15
  33. src/core/violation_utils.py +69 -0
  34. src/formatters/__init__.py +22 -0
  35. src/formatters/sarif.py +202 -0
  36. src/linter_config/directive_markers.py +109 -0
  37. src/linter_config/ignore.py +254 -395
  38. src/linter_config/loader.py +45 -12
  39. src/linter_config/pattern_utils.py +65 -0
  40. src/linter_config/rule_matcher.py +89 -0
  41. src/linters/collection_pipeline/__init__.py +90 -0
  42. src/linters/collection_pipeline/any_all_analyzer.py +281 -0
  43. src/linters/collection_pipeline/ast_utils.py +40 -0
  44. src/linters/collection_pipeline/config.py +75 -0
  45. src/linters/collection_pipeline/continue_analyzer.py +94 -0
  46. src/linters/collection_pipeline/detector.py +360 -0
  47. src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
  48. src/linters/collection_pipeline/linter.py +420 -0
  49. src/linters/collection_pipeline/suggestion_builder.py +130 -0
  50. src/linters/cqs/__init__.py +54 -0
  51. src/linters/cqs/config.py +55 -0
  52. src/linters/cqs/function_analyzer.py +201 -0
  53. src/linters/cqs/input_detector.py +139 -0
  54. src/linters/cqs/linter.py +159 -0
  55. src/linters/cqs/output_detector.py +84 -0
  56. src/linters/cqs/python_analyzer.py +54 -0
  57. src/linters/cqs/types.py +82 -0
  58. src/linters/cqs/typescript_cqs_analyzer.py +61 -0
  59. src/linters/cqs/typescript_function_analyzer.py +192 -0
  60. src/linters/cqs/typescript_input_detector.py +203 -0
  61. src/linters/cqs/typescript_output_detector.py +117 -0
  62. src/linters/cqs/violation_builder.py +94 -0
  63. src/linters/dry/base_token_analyzer.py +16 -9
  64. src/linters/dry/block_filter.py +120 -20
  65. src/linters/dry/block_grouper.py +4 -0
  66. src/linters/dry/cache.py +104 -10
  67. src/linters/dry/cache_query.py +4 -0
  68. src/linters/dry/config.py +54 -11
  69. src/linters/dry/constant.py +92 -0
  70. src/linters/dry/constant_matcher.py +223 -0
  71. src/linters/dry/constant_violation_builder.py +98 -0
  72. src/linters/dry/duplicate_storage.py +5 -4
  73. src/linters/dry/file_analyzer.py +4 -2
  74. src/linters/dry/inline_ignore.py +7 -16
  75. src/linters/dry/linter.py +183 -48
  76. src/linters/dry/python_analyzer.py +60 -439
  77. src/linters/dry/python_constant_extractor.py +100 -0
  78. src/linters/dry/single_statement_detector.py +417 -0
  79. src/linters/dry/token_hasher.py +116 -112
  80. src/linters/dry/typescript_analyzer.py +68 -382
  81. src/linters/dry/typescript_constant_extractor.py +138 -0
  82. src/linters/dry/typescript_statement_detector.py +255 -0
  83. src/linters/dry/typescript_value_extractor.py +70 -0
  84. src/linters/dry/violation_builder.py +4 -0
  85. src/linters/dry/violation_filter.py +5 -4
  86. src/linters/dry/violation_generator.py +71 -14
  87. src/linters/file_header/atemporal_detector.py +68 -50
  88. src/linters/file_header/base_parser.py +93 -0
  89. src/linters/file_header/bash_parser.py +66 -0
  90. src/linters/file_header/config.py +90 -16
  91. src/linters/file_header/css_parser.py +70 -0
  92. src/linters/file_header/field_validator.py +36 -33
  93. src/linters/file_header/linter.py +140 -144
  94. src/linters/file_header/markdown_parser.py +130 -0
  95. src/linters/file_header/python_parser.py +14 -58
  96. src/linters/file_header/typescript_parser.py +73 -0
  97. src/linters/file_header/violation_builder.py +13 -12
  98. src/linters/file_placement/config_loader.py +3 -1
  99. src/linters/file_placement/directory_matcher.py +4 -0
  100. src/linters/file_placement/linter.py +66 -34
  101. src/linters/file_placement/pattern_matcher.py +41 -6
  102. src/linters/file_placement/pattern_validator.py +31 -12
  103. src/linters/file_placement/rule_checker.py +12 -7
  104. src/linters/lazy_ignores/__init__.py +43 -0
  105. src/linters/lazy_ignores/config.py +74 -0
  106. src/linters/lazy_ignores/directive_utils.py +164 -0
  107. src/linters/lazy_ignores/header_parser.py +177 -0
  108. src/linters/lazy_ignores/linter.py +158 -0
  109. src/linters/lazy_ignores/matcher.py +168 -0
  110. src/linters/lazy_ignores/python_analyzer.py +209 -0
  111. src/linters/lazy_ignores/rule_id_utils.py +180 -0
  112. src/linters/lazy_ignores/skip_detector.py +298 -0
  113. src/linters/lazy_ignores/types.py +71 -0
  114. src/linters/lazy_ignores/typescript_analyzer.py +146 -0
  115. src/linters/lazy_ignores/violation_builder.py +135 -0
  116. src/linters/lbyl/__init__.py +31 -0
  117. src/linters/lbyl/config.py +63 -0
  118. src/linters/lbyl/linter.py +67 -0
  119. src/linters/lbyl/pattern_detectors/__init__.py +53 -0
  120. src/linters/lbyl/pattern_detectors/base.py +63 -0
  121. src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
  122. src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
  123. src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
  124. src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
  125. src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
  126. src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
  127. src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
  128. src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
  129. src/linters/lbyl/python_analyzer.py +215 -0
  130. src/linters/lbyl/violation_builder.py +354 -0
  131. src/linters/magic_numbers/context_analyzer.py +227 -225
  132. src/linters/magic_numbers/linter.py +28 -82
  133. src/linters/magic_numbers/python_analyzer.py +4 -16
  134. src/linters/magic_numbers/typescript_analyzer.py +9 -12
  135. src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
  136. src/linters/method_property/__init__.py +49 -0
  137. src/linters/method_property/config.py +138 -0
  138. src/linters/method_property/linter.py +414 -0
  139. src/linters/method_property/python_analyzer.py +473 -0
  140. src/linters/method_property/violation_builder.py +119 -0
  141. src/linters/nesting/linter.py +24 -16
  142. src/linters/nesting/python_analyzer.py +4 -0
  143. src/linters/nesting/typescript_analyzer.py +6 -12
  144. src/linters/nesting/violation_builder.py +1 -0
  145. src/linters/performance/__init__.py +91 -0
  146. src/linters/performance/config.py +43 -0
  147. src/linters/performance/constants.py +49 -0
  148. src/linters/performance/linter.py +149 -0
  149. src/linters/performance/python_analyzer.py +365 -0
  150. src/linters/performance/regex_analyzer.py +312 -0
  151. src/linters/performance/regex_linter.py +139 -0
  152. src/linters/performance/typescript_analyzer.py +236 -0
  153. src/linters/performance/violation_builder.py +160 -0
  154. src/linters/print_statements/config.py +7 -12
  155. src/linters/print_statements/linter.py +26 -43
  156. src/linters/print_statements/python_analyzer.py +91 -93
  157. src/linters/print_statements/typescript_analyzer.py +15 -25
  158. src/linters/print_statements/violation_builder.py +12 -14
  159. src/linters/srp/class_analyzer.py +11 -7
  160. src/linters/srp/heuristics.py +56 -22
  161. src/linters/srp/linter.py +15 -16
  162. src/linters/srp/python_analyzer.py +55 -20
  163. src/linters/srp/typescript_metrics_calculator.py +110 -50
  164. src/linters/stateless_class/__init__.py +25 -0
  165. src/linters/stateless_class/config.py +58 -0
  166. src/linters/stateless_class/linter.py +349 -0
  167. src/linters/stateless_class/python_analyzer.py +290 -0
  168. src/linters/stringly_typed/__init__.py +36 -0
  169. src/linters/stringly_typed/config.py +189 -0
  170. src/linters/stringly_typed/context_filter.py +451 -0
  171. src/linters/stringly_typed/function_call_violation_builder.py +135 -0
  172. src/linters/stringly_typed/ignore_checker.py +100 -0
  173. src/linters/stringly_typed/ignore_utils.py +51 -0
  174. src/linters/stringly_typed/linter.py +376 -0
  175. src/linters/stringly_typed/python/__init__.py +33 -0
  176. src/linters/stringly_typed/python/analyzer.py +348 -0
  177. src/linters/stringly_typed/python/call_tracker.py +175 -0
  178. src/linters/stringly_typed/python/comparison_tracker.py +257 -0
  179. src/linters/stringly_typed/python/condition_extractor.py +134 -0
  180. src/linters/stringly_typed/python/conditional_detector.py +179 -0
  181. src/linters/stringly_typed/python/constants.py +21 -0
  182. src/linters/stringly_typed/python/match_analyzer.py +94 -0
  183. src/linters/stringly_typed/python/validation_detector.py +189 -0
  184. src/linters/stringly_typed/python/variable_extractor.py +96 -0
  185. src/linters/stringly_typed/storage.py +620 -0
  186. src/linters/stringly_typed/storage_initializer.py +45 -0
  187. src/linters/stringly_typed/typescript/__init__.py +28 -0
  188. src/linters/stringly_typed/typescript/analyzer.py +157 -0
  189. src/linters/stringly_typed/typescript/call_tracker.py +335 -0
  190. src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
  191. src/linters/stringly_typed/violation_generator.py +419 -0
  192. src/orchestrator/core.py +252 -14
  193. src/orchestrator/language_detector.py +5 -3
  194. src/templates/thailint_config_template.yaml +196 -0
  195. src/utils/project_root.py +3 -0
  196. thailint-0.15.3.dist-info/METADATA +187 -0
  197. thailint-0.15.3.dist-info/RECORD +226 -0
  198. thailint-0.15.3.dist-info/entry_points.txt +4 -0
  199. src/cli.py +0 -1665
  200. thailint-0.5.0.dist-info/METADATA +0 -1286
  201. thailint-0.5.0.dist-info/RECORD +0 -96
  202. thailint-0.5.0.dist-info/entry_points.txt +0 -4
  203. {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +0 -0
  204. {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,365 @@
1
+ """
2
+ Purpose: Python AST-based string concatenation in loop detector
3
+
4
+ Scope: Detect O(n²) string building patterns using += in for/while loops
5
+
6
+ Overview: Analyzes Python code to detect string concatenation inside loops using AST traversal.
7
+ Implements heuristic-based detection using variable naming patterns and initialization values.
8
+ Detects `result += str(item)` patterns inside for/while loops that indicate O(n²) complexity.
9
+ Provides suggestions for using join() or list comprehension instead.
10
+
11
+ Dependencies: ast module for Python parsing, constants module for shared patterns
12
+
13
+ Exports: PythonStringConcatAnalyzer class with find_violations method
14
+
15
+ Interfaces: find_violations(tree: ast.AST) -> list[dict] with violation info
16
+
17
+ Implementation: AST visitor pattern detecting augmented assignments in loop contexts
18
+
19
+ Suppressions:
20
+ - srp.violation: Class uses many small methods to achieve A-grade cyclomatic complexity.
21
+ This is an intentional tradeoff - low complexity is prioritized over strict SRP adherence.
22
+ """
23
+
24
+ import ast
25
+ from dataclasses import dataclass
26
+
27
+ from .constants import STRING_VARIABLE_PATTERNS
28
+
29
+
30
+ @dataclass
31
+ class StringConcatViolation:
32
+ """Represents a string concatenation violation found in code."""
33
+
34
+ variable_name: str
35
+ line_number: int
36
+ column: int
37
+ loop_type: str # 'for' or 'while'
38
+
39
+
40
+ # thailint: ignore-next-line[srp.violation] Uses small focused methods to reduce complexity
41
+ class PythonStringConcatAnalyzer:
42
+ """Detects string concatenation in loops for Python code."""
43
+
44
+ def __init__(self) -> None:
45
+ """Initialize the analyzer."""
46
+ self._string_variables: set[str] = set()
47
+ self._non_string_variables: set[str] = set() # Lists, numbers, etc.
48
+
49
+ def find_violations(self, tree: ast.AST) -> list[StringConcatViolation]:
50
+ """Find all string concatenation in loop violations.
51
+
52
+ Args:
53
+ tree: Python AST to analyze
54
+
55
+ Returns:
56
+ List of violations found
57
+ """
58
+ violations: list[StringConcatViolation] = []
59
+ self._string_variables = set()
60
+ self._non_string_variables = set()
61
+
62
+ # First pass: identify variables initialized as strings or non-strings
63
+ self._identify_string_variables(tree)
64
+
65
+ # Second pass: find += in loops
66
+ self._find_concat_in_loops(tree, violations)
67
+
68
+ return violations
69
+
70
+ def _identify_string_variables(self, tree: ast.AST) -> None:
71
+ """Identify variables that are initialized as strings or non-strings.
72
+
73
+ Args:
74
+ tree: AST to analyze
75
+ """
76
+ for node in ast.walk(tree):
77
+ self._process_assignment_node(node)
78
+
79
+ def _process_assignment_node(self, node: ast.AST) -> None:
80
+ """Process a single assignment node to track variable types."""
81
+ if isinstance(node, ast.Assign):
82
+ self._process_simple_assign(node)
83
+ elif isinstance(node, ast.AnnAssign):
84
+ self._process_annotated_assign(node)
85
+
86
+ def _process_simple_assign(self, node: ast.Assign) -> None:
87
+ """Process a simple assignment node."""
88
+ for target in node.targets:
89
+ if isinstance(target, ast.Name):
90
+ self._classify_variable(target.id, node.value)
91
+
92
+ def _process_annotated_assign(self, node: ast.AnnAssign) -> None:
93
+ """Process an annotated assignment node."""
94
+ if node.value and isinstance(node.target, ast.Name):
95
+ self._classify_variable(node.target.id, node.value)
96
+
97
+ def _classify_variable(self, var_name: str, value: ast.expr) -> None:
98
+ """Classify a variable as string or non-string based on its value."""
99
+ if self._is_string_value(value):
100
+ self._string_variables.add(var_name)
101
+ elif self._is_non_string_value(value):
102
+ self._non_string_variables.add(var_name)
103
+
104
+ def _is_string_value(self, node: ast.expr) -> bool:
105
+ """Check if an expression is a string value.
106
+
107
+ Args:
108
+ node: Expression node to check
109
+
110
+ Returns:
111
+ True if the expression is a string literal or f-string
112
+ """
113
+ # String literal: "", '', """..."""
114
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
115
+ return True
116
+
117
+ # f-string: f"..."
118
+ if isinstance(node, ast.JoinedStr):
119
+ return True
120
+
121
+ return False
122
+
123
+ def _is_non_string_value(self, node: ast.expr) -> bool:
124
+ """Check if an expression is clearly not a string (list, number, etc).
125
+
126
+ Args:
127
+ node: Expression node to check
128
+
129
+ Returns:
130
+ True if the expression is clearly not a string
131
+ """
132
+ # Collection literals: [], {}, set()
133
+ if isinstance(node, (ast.List, ast.Dict, ast.Set)):
134
+ return True
135
+ # Numeric literal: 0, 1.0
136
+ return isinstance(node, ast.Constant) and isinstance(node.value, (int, float))
137
+
138
+ def _find_concat_in_loops(
139
+ self,
140
+ node: ast.AST,
141
+ violations: list[StringConcatViolation],
142
+ in_loop: str | None = None,
143
+ reset_vars: set[str] | None = None,
144
+ ) -> None:
145
+ """Recursively find string concatenation in loops.
146
+
147
+ Args:
148
+ node: Current AST node
149
+ violations: List to append violations to
150
+ in_loop: Type of enclosing loop ('for' or 'while'), None if not in loop
151
+ reset_vars: Variables reset to string values in current loop body
152
+ """
153
+ if reset_vars is None:
154
+ reset_vars = set()
155
+
156
+ # When entering a new loop, find variables reset in its body
157
+ loop_type = self._get_loop_type(node)
158
+ current_loop: str | None
159
+ current_reset_vars: set[str]
160
+ if loop_type:
161
+ # Find variables assigned to strings in this loop's body
162
+ loop_reset_vars = self._find_vars_reset_in_loop(node)
163
+ current_loop = loop_type
164
+ current_reset_vars = loop_reset_vars
165
+ else:
166
+ current_loop = in_loop
167
+ current_reset_vars = reset_vars
168
+
169
+ self._check_for_string_concat(node, violations, current_loop, current_reset_vars)
170
+
171
+ for child in ast.iter_child_nodes(node):
172
+ self._find_concat_in_loops(child, violations, current_loop, current_reset_vars)
173
+
174
+ def _get_loop_type(self, node: ast.AST) -> str | None:
175
+ """Get the loop type if node is a loop, else None."""
176
+ if isinstance(node, ast.For):
177
+ return "for"
178
+ if isinstance(node, ast.While):
179
+ return "while"
180
+ return None
181
+
182
+ def _find_vars_reset_in_loop(self, loop_node: ast.AST) -> set[str]:
183
+ """Find variables that are assigned to string values in a loop body.
184
+
185
+ These variables are "reset" each iteration and should not be flagged
186
+ for O(n²) string concatenation since they don't accumulate across iterations.
187
+
188
+ Args:
189
+ loop_node: A For or While loop AST node
190
+
191
+ Returns:
192
+ Set of variable names that are reset to strings in the loop body
193
+ """
194
+ reset_vars: set[str] = set()
195
+
196
+ # Get the loop body
197
+ if isinstance(loop_node, (ast.For, ast.While)):
198
+ body = loop_node.body
199
+ else:
200
+ return reset_vars
201
+
202
+ # Scan assignments in the loop body (not nested loops)
203
+ for stmt in body:
204
+ self._collect_string_assigns(stmt, reset_vars)
205
+
206
+ return reset_vars
207
+
208
+ def _collect_string_assigns(self, node: ast.AST, reset_vars: set[str]) -> None:
209
+ """Collect variable names assigned to string values in a node.
210
+
211
+ Args:
212
+ node: AST node to scan
213
+ reset_vars: Set to add found variable names to
214
+ """
215
+ self._check_simple_assign(node, reset_vars)
216
+ self._check_annotated_assign(node, reset_vars)
217
+ self._recurse_control_flow(node, reset_vars)
218
+
219
+ def _check_simple_assign(self, node: ast.AST, reset_vars: set[str]) -> None:
220
+ """Check if node is a simple assignment to a string value."""
221
+ if not isinstance(node, ast.Assign):
222
+ return
223
+ for target in node.targets:
224
+ if isinstance(target, ast.Name) and self._is_string_value(node.value):
225
+ reset_vars.add(target.id)
226
+
227
+ def _check_annotated_assign(self, node: ast.AST, reset_vars: set[str]) -> None:
228
+ """Check if node is an annotated assignment to a string value."""
229
+ if not isinstance(node, ast.AnnAssign):
230
+ return
231
+ if node.value and isinstance(node.target, ast.Name) and self._is_string_value(node.value):
232
+ reset_vars.add(node.target.id)
233
+
234
+ def _recurse_control_flow(self, node: ast.AST, reset_vars: set[str]) -> None:
235
+ """Recurse into control flow (if/else, try/except) but NOT into nested loops."""
236
+ if isinstance(node, ast.If):
237
+ self._recurse_if_node(node, reset_vars)
238
+ elif isinstance(node, ast.Try):
239
+ self._recurse_try_node(node, reset_vars)
240
+
241
+ def _recurse_if_node(self, node: ast.If, reset_vars: set[str]) -> None:
242
+ """Recurse into if/else branches."""
243
+ for stmt in node.body + node.orelse:
244
+ self._collect_string_assigns(stmt, reset_vars)
245
+
246
+ def _recurse_try_node(self, node: ast.Try, reset_vars: set[str]) -> None:
247
+ """Recurse into try/except/finally branches."""
248
+ for stmt in node.body + node.orelse + node.finalbody:
249
+ self._collect_string_assigns(stmt, reset_vars)
250
+ for handler in node.handlers:
251
+ for stmt in handler.body:
252
+ self._collect_string_assigns(stmt, reset_vars)
253
+
254
+ def _check_for_string_concat(
255
+ self,
256
+ node: ast.AST,
257
+ violations: list[StringConcatViolation],
258
+ loop_type: str | None,
259
+ reset_vars: set[str] | None = None,
260
+ ) -> None:
261
+ """Check if node is a string concatenation in a loop and add violation if so."""
262
+ if not self._is_add_aug_assign_in_loop(node, loop_type):
263
+ return
264
+ self._process_aug_assign(node, violations, loop_type or "", reset_vars)
265
+
266
+ def _process_aug_assign(
267
+ self,
268
+ node: ast.AST,
269
+ violations: list[StringConcatViolation],
270
+ loop_type: str,
271
+ reset_vars: set[str] | None,
272
+ ) -> None:
273
+ """Process an augmented assignment node for potential violations."""
274
+ if not isinstance(node, ast.AugAssign) or not isinstance(node.target, ast.Name):
275
+ return
276
+ var_name = node.target.id
277
+ if self._should_skip_reset_var(var_name, reset_vars):
278
+ return
279
+ self._add_string_concat_violation(node, var_name, loop_type, violations)
280
+
281
+ def _should_skip_reset_var(self, var_name: str, reset_vars: set[str] | None) -> bool:
282
+ """Check if variable is reset in the loop and should be skipped."""
283
+ return reset_vars is not None and var_name in reset_vars
284
+
285
+ def _is_add_aug_assign_in_loop(self, node: ast.AST, loop_type: str | None) -> bool:
286
+ """Check if node is a += augmented assignment in a loop."""
287
+ if not loop_type or not isinstance(node, ast.AugAssign):
288
+ return False
289
+ return isinstance(node.op, ast.Add) and isinstance(node.target, ast.Name)
290
+
291
+ def _add_string_concat_violation(
292
+ self,
293
+ node: ast.AugAssign,
294
+ var_name: str,
295
+ loop_type: str,
296
+ violations: list[StringConcatViolation],
297
+ ) -> None:
298
+ """Add violation if variable is likely a string."""
299
+ if not self._is_likely_string_variable(var_name, node.value):
300
+ return
301
+ violations.append(
302
+ StringConcatViolation(
303
+ variable_name=var_name,
304
+ line_number=node.lineno,
305
+ column=node.col_offset,
306
+ loop_type=loop_type,
307
+ )
308
+ )
309
+
310
+ def _is_likely_string_variable(self, var_name: str, value: ast.expr) -> bool:
311
+ """Determine if a variable is likely a string being concatenated.
312
+
313
+ Args:
314
+ var_name: Variable name
315
+ value: Value being added
316
+
317
+ Returns:
318
+ True if this is likely string concatenation
319
+ """
320
+ if var_name in self._non_string_variables:
321
+ return False
322
+ return (
323
+ self._is_known_string_var(var_name)
324
+ or self._is_string_value(value)
325
+ or self._is_str_call(value)
326
+ or self._is_string_binop(value)
327
+ )
328
+
329
+ def _is_known_string_var(self, var_name: str) -> bool:
330
+ """Check if variable is known or named like a string."""
331
+ return var_name in self._string_variables or var_name.lower() in STRING_VARIABLE_PATTERNS
332
+
333
+ def _is_str_call(self, value: ast.expr) -> bool:
334
+ """Check if value is a str() call."""
335
+ if not isinstance(value, ast.Call):
336
+ return False
337
+ return isinstance(value.func, ast.Name) and value.func.id == "str"
338
+
339
+ def _is_string_binop(self, value: ast.expr) -> bool:
340
+ """Check if value is a binary op with string operand."""
341
+ if not isinstance(value, ast.BinOp) or not isinstance(value.op, ast.Add):
342
+ return False
343
+ return self._is_string_value(value.left) or self._is_string_value(value.right)
344
+
345
+ def deduplicate_violations(
346
+ self, violations: list[StringConcatViolation]
347
+ ) -> list[StringConcatViolation]:
348
+ """Deduplicate violations to report one per loop, not per +=.
349
+
350
+ Args:
351
+ violations: List of all violations found
352
+
353
+ Returns:
354
+ Deduplicated list with one violation per variable per loop
355
+ """
356
+ # Group by variable name and keep first occurrence
357
+ seen: set[str] = set()
358
+ result: list[StringConcatViolation] = []
359
+
360
+ for v in violations:
361
+ if v.variable_name not in seen:
362
+ seen.add(v.variable_name)
363
+ result.append(v)
364
+
365
+ return result
@@ -0,0 +1,312 @@
1
+ """
2
+ Purpose: Python AST-based regex compilation in loop detector
3
+
4
+ Scope: Detect repeated regex compilation patterns using re.method() in for/while loops
5
+
6
+ Overview: Analyzes Python code to detect regex function calls inside loops using AST traversal.
7
+ Detects calls to re.match(), re.search(), re.sub(), re.findall(), re.split(), and
8
+ re.fullmatch() inside for/while loops. Tracks variables assigned from re.compile() to
9
+ avoid false positives when compiled patterns are correctly used. Supports import
10
+ variations including 'import re', 'from re import match', and 'import re as alias'.
11
+
12
+ Dependencies: ast module for Python parsing
13
+
14
+ Exports: PythonRegexInLoopAnalyzer class with find_violations method
15
+
16
+ Interfaces: find_violations(tree: ast.AST) -> list[RegexInLoopViolation]
17
+
18
+ Implementation: AST visitor pattern detecting regex calls in loop contexts with compiled
19
+ pattern tracking
20
+
21
+ Suppressions:
22
+ - srp.violation: Class uses many small methods to achieve A-grade cyclomatic complexity.
23
+ This is an intentional tradeoff - low complexity is prioritized over strict SRP adherence.
24
+ """
25
+
26
+ import ast
27
+ from dataclasses import dataclass
28
+
29
+ # Regex module functions that compile patterns on each call
30
+ RE_FUNCTIONS = frozenset(
31
+ {
32
+ "match",
33
+ "search",
34
+ "sub",
35
+ "subn",
36
+ "findall",
37
+ "finditer",
38
+ "split",
39
+ "fullmatch",
40
+ }
41
+ )
42
+
43
+
44
+ @dataclass
45
+ class RegexInLoopViolation:
46
+ """Represents a regex-in-loop violation found in code."""
47
+
48
+ method_name: str
49
+ line_number: int
50
+ column: int
51
+ loop_type: str # 'for' or 'while'
52
+
53
+
54
+ # thailint: ignore-next-line[srp.violation] Uses small focused methods to reduce complexity
55
+ class PythonRegexInLoopAnalyzer:
56
+ """Detects regex function calls in loops for Python code."""
57
+
58
+ def __init__(self) -> None:
59
+ """Initialize the analyzer."""
60
+ self._compiled_patterns: set[str] = set()
61
+ self._re_aliases: set[str] = set() # Module aliases like 'regex' from 'import re as regex'
62
+ self._direct_imports: set[str] = set() # Direct imports like 'match' from 'from re import'
63
+
64
+ def find_violations(self, tree: ast.AST) -> list[RegexInLoopViolation]:
65
+ """Find all regex-in-loop violations.
66
+
67
+ Args:
68
+ tree: Python AST to analyze
69
+
70
+ Returns:
71
+ List of violations found
72
+ """
73
+ violations: list[RegexInLoopViolation] = []
74
+ self._compiled_patterns = set()
75
+ self._re_aliases = {"re"} # Default 're' is always valid
76
+ self._direct_imports = set()
77
+
78
+ # First pass: identify imports and compiled patterns
79
+ self._identify_imports(tree)
80
+ self._identify_compiled_patterns(tree)
81
+
82
+ # Second pass: find regex calls in loops
83
+ self._find_regex_in_loops(tree, violations)
84
+
85
+ return violations
86
+
87
+ def _identify_imports(self, tree: ast.AST) -> None:
88
+ """Identify re module imports and aliases.
89
+
90
+ Args:
91
+ tree: AST to analyze
92
+ """
93
+ for node in ast.walk(tree):
94
+ self._process_import_node(node)
95
+
96
+ def _process_import_node(self, node: ast.AST) -> None:
97
+ """Process a single import node."""
98
+ if isinstance(node, ast.Import):
99
+ self._process_regular_import(node)
100
+ elif isinstance(node, ast.ImportFrom):
101
+ self._process_from_import(node)
102
+
103
+ def _process_regular_import(self, node: ast.Import) -> None:
104
+ """Process 'import re' or 'import re as regex' style imports."""
105
+ for alias in node.names:
106
+ if alias.name == "re":
107
+ # import re as regex -> add 'regex' as valid alias
108
+ self._re_aliases.add(alias.asname or "re")
109
+
110
+ def _process_from_import(self, node: ast.ImportFrom) -> None:
111
+ """Process 'from re import match' style imports."""
112
+ if node.module != "re":
113
+ return
114
+ for alias in node.names:
115
+ self._add_direct_import_if_re_function(alias)
116
+
117
+ def _add_direct_import_if_re_function(self, alias: ast.alias) -> None:
118
+ """Add alias to direct imports if it's a regex function."""
119
+ if alias.name not in RE_FUNCTIONS:
120
+ return
121
+ imported_name = alias.asname or alias.name
122
+ self._direct_imports.add(imported_name)
123
+
124
+ def _identify_compiled_patterns(self, tree: ast.AST) -> None:
125
+ """Identify variables assigned from re.compile().
126
+
127
+ Args:
128
+ tree: AST to analyze
129
+ """
130
+ for node in ast.walk(tree):
131
+ self._check_for_compile_assignment(node)
132
+
133
+ def _check_for_compile_assignment(self, node: ast.AST) -> None:
134
+ """Check if node is an assignment from re.compile()."""
135
+ if isinstance(node, ast.Assign):
136
+ self._process_compile_assign(node)
137
+ elif isinstance(node, ast.AnnAssign):
138
+ self._process_compile_ann_assign(node)
139
+
140
+ def _process_compile_assign(self, node: ast.Assign) -> None:
141
+ """Process simple assignment for re.compile()."""
142
+ if not self._is_re_compile_call(node.value):
143
+ return
144
+ for target in node.targets:
145
+ self._add_compiled_pattern_if_name(target)
146
+
147
+ def _add_compiled_pattern_if_name(self, target: ast.expr) -> None:
148
+ """Add target to compiled patterns if it's a Name node."""
149
+ if isinstance(target, ast.Name):
150
+ self._compiled_patterns.add(target.id)
151
+
152
+ def _process_compile_ann_assign(self, node: ast.AnnAssign) -> None:
153
+ """Process annotated assignment for re.compile()."""
154
+ if node.value and self._is_re_compile_call(node.value):
155
+ if isinstance(node.target, ast.Name):
156
+ self._compiled_patterns.add(node.target.id)
157
+
158
+ def _is_re_compile_call(self, node: ast.expr) -> bool:
159
+ """Check if expression is a call to re.compile().
160
+
161
+ Args:
162
+ node: Expression node to check
163
+
164
+ Returns:
165
+ True if this is a re.compile() call
166
+ """
167
+ if not isinstance(node, ast.Call):
168
+ return False
169
+
170
+ func = node.func
171
+
172
+ # re.compile() style
173
+ if isinstance(func, ast.Attribute):
174
+ return self._is_module_compile(func)
175
+
176
+ return False
177
+
178
+ def _is_module_compile(self, func: ast.Attribute) -> bool:
179
+ """Check if attribute access is module.compile()."""
180
+ if func.attr != "compile":
181
+ return False
182
+ if isinstance(func.value, ast.Name):
183
+ return func.value.id in self._re_aliases
184
+ return False
185
+
186
+ def _find_regex_in_loops(
187
+ self,
188
+ node: ast.AST,
189
+ violations: list[RegexInLoopViolation],
190
+ in_loop: str | None = None,
191
+ ) -> None:
192
+ """Recursively find regex calls in loops.
193
+
194
+ Args:
195
+ node: Current AST node
196
+ violations: List to append violations to
197
+ in_loop: Type of enclosing loop ('for' or 'while'), None if not in loop
198
+ """
199
+ current_loop = self._get_loop_type(node) or in_loop
200
+ self._check_for_regex_call(node, violations, current_loop)
201
+
202
+ for child in ast.iter_child_nodes(node):
203
+ self._find_regex_in_loops(child, violations, current_loop)
204
+
205
+ def _get_loop_type(self, node: ast.AST) -> str | None:
206
+ """Get the loop type if node is a loop, else None."""
207
+ if isinstance(node, ast.For):
208
+ return "for"
209
+ if isinstance(node, ast.While):
210
+ return "while"
211
+ return None
212
+
213
+ def _check_for_regex_call(
214
+ self,
215
+ node: ast.AST,
216
+ violations: list[RegexInLoopViolation],
217
+ loop_type: str | None,
218
+ ) -> None:
219
+ """Check if node is a regex call in a loop and add violation if so."""
220
+ if not loop_type or not isinstance(node, ast.Call):
221
+ return
222
+
223
+ violation = self._create_violation_if_regex_call(node, loop_type)
224
+ if violation:
225
+ violations.append(violation)
226
+
227
+ def _create_violation_if_regex_call(
228
+ self,
229
+ node: ast.Call,
230
+ loop_type: str,
231
+ ) -> RegexInLoopViolation | None:
232
+ """Create violation if this is an uncompiled regex call.
233
+
234
+ Args:
235
+ node: Call node
236
+ loop_type: Type of enclosing loop
237
+
238
+ Returns:
239
+ Violation if uncompiled regex call, None otherwise
240
+ """
241
+ method_name = self._get_regex_method_name(node)
242
+ if method_name:
243
+ return RegexInLoopViolation(
244
+ method_name=method_name,
245
+ line_number=node.lineno,
246
+ column=node.col_offset,
247
+ loop_type=loop_type,
248
+ )
249
+ return None
250
+
251
+ def _get_regex_method_name(self, node: ast.Call) -> str | None:
252
+ """Get regex method name if this is an uncompiled regex call.
253
+
254
+ Args:
255
+ node: Call node to check
256
+
257
+ Returns:
258
+ Method name (e.g., 'match', 'search') if uncompiled regex call, None otherwise
259
+ """
260
+ func = node.func
261
+
262
+ # re.match() style or regex.match() style
263
+ if isinstance(func, ast.Attribute):
264
+ return self._check_module_regex_call(func)
265
+
266
+ # Direct import: match() from 'from re import match'
267
+ if isinstance(func, ast.Name):
268
+ return self._check_direct_import_call(func)
269
+
270
+ return None
271
+
272
+ def _check_module_regex_call(self, func: ast.Attribute) -> str | None:
273
+ """Check if this is re.method() style call.
274
+
275
+ Args:
276
+ func: Attribute node (e.g., re.match)
277
+
278
+ Returns:
279
+ Method name if uncompiled regex call, None otherwise
280
+ """
281
+ method = func.attr
282
+
283
+ # Not a regex function we care about
284
+ if method not in RE_FUNCTIONS:
285
+ return None
286
+
287
+ # Check if it's called on a compiled pattern variable
288
+ if isinstance(func.value, ast.Name):
289
+ caller = func.value.id
290
+
291
+ # Called on compiled pattern: pattern.match() -> OK
292
+ if caller in self._compiled_patterns:
293
+ return None
294
+
295
+ # Called on re module or alias: re.match() -> Violation
296
+ if caller in self._re_aliases:
297
+ return f"re.{method}"
298
+
299
+ return None
300
+
301
+ def _check_direct_import_call(self, func: ast.Name) -> str | None:
302
+ """Check if this is a directly imported regex function call.
303
+
304
+ Args:
305
+ func: Name node (e.g., match)
306
+
307
+ Returns:
308
+ Method name if directly imported regex function, None otherwise
309
+ """
310
+ if func.id in self._direct_imports:
311
+ return func.id
312
+ return None