thailint 0.2.0__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. src/__init__.py +1 -0
  2. src/analyzers/__init__.py +4 -3
  3. src/analyzers/ast_utils.py +54 -0
  4. src/analyzers/rust_base.py +155 -0
  5. src/analyzers/rust_context.py +141 -0
  6. src/analyzers/typescript_base.py +4 -0
  7. src/cli/__init__.py +30 -0
  8. src/cli/__main__.py +22 -0
  9. src/cli/config.py +480 -0
  10. src/cli/config_merge.py +241 -0
  11. src/cli/linters/__init__.py +67 -0
  12. src/cli/linters/code_patterns.py +270 -0
  13. src/cli/linters/code_smells.py +342 -0
  14. src/cli/linters/documentation.py +83 -0
  15. src/cli/linters/performance.py +287 -0
  16. src/cli/linters/shared.py +331 -0
  17. src/cli/linters/structure.py +327 -0
  18. src/cli/linters/structure_quality.py +328 -0
  19. src/cli/main.py +120 -0
  20. src/cli/utils.py +395 -0
  21. src/cli_main.py +37 -0
  22. src/config.py +44 -27
  23. src/core/base.py +95 -5
  24. src/core/cli_utils.py +19 -2
  25. src/core/config_parser.py +36 -6
  26. src/core/constants.py +54 -0
  27. src/core/linter_utils.py +95 -6
  28. src/core/python_lint_rule.py +101 -0
  29. src/core/registry.py +1 -1
  30. src/core/rule_discovery.py +147 -84
  31. src/core/types.py +13 -0
  32. src/core/violation_builder.py +78 -15
  33. src/core/violation_utils.py +69 -0
  34. src/formatters/__init__.py +22 -0
  35. src/formatters/sarif.py +202 -0
  36. src/linter_config/directive_markers.py +109 -0
  37. src/linter_config/ignore.py +254 -395
  38. src/linter_config/loader.py +45 -12
  39. src/linter_config/pattern_utils.py +65 -0
  40. src/linter_config/rule_matcher.py +89 -0
  41. src/linters/collection_pipeline/__init__.py +90 -0
  42. src/linters/collection_pipeline/any_all_analyzer.py +281 -0
  43. src/linters/collection_pipeline/ast_utils.py +40 -0
  44. src/linters/collection_pipeline/config.py +75 -0
  45. src/linters/collection_pipeline/continue_analyzer.py +94 -0
  46. src/linters/collection_pipeline/detector.py +360 -0
  47. src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
  48. src/linters/collection_pipeline/linter.py +420 -0
  49. src/linters/collection_pipeline/suggestion_builder.py +130 -0
  50. src/linters/cqs/__init__.py +54 -0
  51. src/linters/cqs/config.py +55 -0
  52. src/linters/cqs/function_analyzer.py +201 -0
  53. src/linters/cqs/input_detector.py +139 -0
  54. src/linters/cqs/linter.py +159 -0
  55. src/linters/cqs/output_detector.py +84 -0
  56. src/linters/cqs/python_analyzer.py +54 -0
  57. src/linters/cqs/types.py +82 -0
  58. src/linters/cqs/typescript_cqs_analyzer.py +61 -0
  59. src/linters/cqs/typescript_function_analyzer.py +192 -0
  60. src/linters/cqs/typescript_input_detector.py +203 -0
  61. src/linters/cqs/typescript_output_detector.py +117 -0
  62. src/linters/cqs/violation_builder.py +94 -0
  63. src/linters/dry/base_token_analyzer.py +16 -9
  64. src/linters/dry/block_filter.py +125 -22
  65. src/linters/dry/block_grouper.py +4 -0
  66. src/linters/dry/cache.py +142 -94
  67. src/linters/dry/cache_query.py +4 -0
  68. src/linters/dry/config.py +68 -21
  69. src/linters/dry/constant.py +92 -0
  70. src/linters/dry/constant_matcher.py +223 -0
  71. src/linters/dry/constant_violation_builder.py +98 -0
  72. src/linters/dry/duplicate_storage.py +20 -82
  73. src/linters/dry/file_analyzer.py +15 -50
  74. src/linters/dry/inline_ignore.py +7 -16
  75. src/linters/dry/linter.py +182 -54
  76. src/linters/dry/python_analyzer.py +108 -336
  77. src/linters/dry/python_constant_extractor.py +100 -0
  78. src/linters/dry/single_statement_detector.py +417 -0
  79. src/linters/dry/storage_initializer.py +9 -18
  80. src/linters/dry/token_hasher.py +129 -71
  81. src/linters/dry/typescript_analyzer.py +68 -380
  82. src/linters/dry/typescript_constant_extractor.py +138 -0
  83. src/linters/dry/typescript_statement_detector.py +255 -0
  84. src/linters/dry/typescript_value_extractor.py +70 -0
  85. src/linters/dry/violation_builder.py +4 -0
  86. src/linters/dry/violation_filter.py +9 -5
  87. src/linters/dry/violation_generator.py +71 -14
  88. src/linters/file_header/__init__.py +24 -0
  89. src/linters/file_header/atemporal_detector.py +105 -0
  90. src/linters/file_header/base_parser.py +93 -0
  91. src/linters/file_header/bash_parser.py +66 -0
  92. src/linters/file_header/config.py +140 -0
  93. src/linters/file_header/css_parser.py +70 -0
  94. src/linters/file_header/field_validator.py +72 -0
  95. src/linters/file_header/linter.py +309 -0
  96. src/linters/file_header/markdown_parser.py +130 -0
  97. src/linters/file_header/python_parser.py +42 -0
  98. src/linters/file_header/typescript_parser.py +73 -0
  99. src/linters/file_header/violation_builder.py +79 -0
  100. src/linters/file_placement/config_loader.py +3 -1
  101. src/linters/file_placement/directory_matcher.py +4 -0
  102. src/linters/file_placement/linter.py +74 -31
  103. src/linters/file_placement/pattern_matcher.py +41 -6
  104. src/linters/file_placement/pattern_validator.py +31 -12
  105. src/linters/file_placement/rule_checker.py +12 -7
  106. src/linters/lazy_ignores/__init__.py +43 -0
  107. src/linters/lazy_ignores/config.py +74 -0
  108. src/linters/lazy_ignores/directive_utils.py +164 -0
  109. src/linters/lazy_ignores/header_parser.py +177 -0
  110. src/linters/lazy_ignores/linter.py +158 -0
  111. src/linters/lazy_ignores/matcher.py +168 -0
  112. src/linters/lazy_ignores/python_analyzer.py +209 -0
  113. src/linters/lazy_ignores/rule_id_utils.py +180 -0
  114. src/linters/lazy_ignores/skip_detector.py +298 -0
  115. src/linters/lazy_ignores/types.py +71 -0
  116. src/linters/lazy_ignores/typescript_analyzer.py +146 -0
  117. src/linters/lazy_ignores/violation_builder.py +135 -0
  118. src/linters/lbyl/__init__.py +31 -0
  119. src/linters/lbyl/config.py +63 -0
  120. src/linters/lbyl/linter.py +67 -0
  121. src/linters/lbyl/pattern_detectors/__init__.py +53 -0
  122. src/linters/lbyl/pattern_detectors/base.py +63 -0
  123. src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
  124. src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
  125. src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
  126. src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
  127. src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
  128. src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
  129. src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
  130. src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
  131. src/linters/lbyl/python_analyzer.py +215 -0
  132. src/linters/lbyl/violation_builder.py +354 -0
  133. src/linters/magic_numbers/__init__.py +48 -0
  134. src/linters/magic_numbers/config.py +82 -0
  135. src/linters/magic_numbers/context_analyzer.py +249 -0
  136. src/linters/magic_numbers/linter.py +462 -0
  137. src/linters/magic_numbers/python_analyzer.py +64 -0
  138. src/linters/magic_numbers/typescript_analyzer.py +215 -0
  139. src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
  140. src/linters/magic_numbers/violation_builder.py +98 -0
  141. src/linters/method_property/__init__.py +49 -0
  142. src/linters/method_property/config.py +138 -0
  143. src/linters/method_property/linter.py +414 -0
  144. src/linters/method_property/python_analyzer.py +473 -0
  145. src/linters/method_property/violation_builder.py +119 -0
  146. src/linters/nesting/__init__.py +6 -2
  147. src/linters/nesting/config.py +6 -3
  148. src/linters/nesting/linter.py +31 -34
  149. src/linters/nesting/python_analyzer.py +4 -0
  150. src/linters/nesting/typescript_analyzer.py +6 -11
  151. src/linters/nesting/violation_builder.py +1 -0
  152. src/linters/performance/__init__.py +91 -0
  153. src/linters/performance/config.py +43 -0
  154. src/linters/performance/constants.py +49 -0
  155. src/linters/performance/linter.py +149 -0
  156. src/linters/performance/python_analyzer.py +365 -0
  157. src/linters/performance/regex_analyzer.py +312 -0
  158. src/linters/performance/regex_linter.py +139 -0
  159. src/linters/performance/typescript_analyzer.py +236 -0
  160. src/linters/performance/violation_builder.py +160 -0
  161. src/linters/print_statements/__init__.py +53 -0
  162. src/linters/print_statements/config.py +78 -0
  163. src/linters/print_statements/linter.py +413 -0
  164. src/linters/print_statements/python_analyzer.py +153 -0
  165. src/linters/print_statements/typescript_analyzer.py +125 -0
  166. src/linters/print_statements/violation_builder.py +96 -0
  167. src/linters/srp/__init__.py +3 -3
  168. src/linters/srp/class_analyzer.py +11 -7
  169. src/linters/srp/config.py +12 -6
  170. src/linters/srp/heuristics.py +56 -22
  171. src/linters/srp/linter.py +47 -39
  172. src/linters/srp/python_analyzer.py +55 -20
  173. src/linters/srp/typescript_metrics_calculator.py +110 -50
  174. src/linters/stateless_class/__init__.py +25 -0
  175. src/linters/stateless_class/config.py +58 -0
  176. src/linters/stateless_class/linter.py +349 -0
  177. src/linters/stateless_class/python_analyzer.py +290 -0
  178. src/linters/stringly_typed/__init__.py +36 -0
  179. src/linters/stringly_typed/config.py +189 -0
  180. src/linters/stringly_typed/context_filter.py +451 -0
  181. src/linters/stringly_typed/function_call_violation_builder.py +135 -0
  182. src/linters/stringly_typed/ignore_checker.py +100 -0
  183. src/linters/stringly_typed/ignore_utils.py +51 -0
  184. src/linters/stringly_typed/linter.py +376 -0
  185. src/linters/stringly_typed/python/__init__.py +33 -0
  186. src/linters/stringly_typed/python/analyzer.py +348 -0
  187. src/linters/stringly_typed/python/call_tracker.py +175 -0
  188. src/linters/stringly_typed/python/comparison_tracker.py +257 -0
  189. src/linters/stringly_typed/python/condition_extractor.py +134 -0
  190. src/linters/stringly_typed/python/conditional_detector.py +179 -0
  191. src/linters/stringly_typed/python/constants.py +21 -0
  192. src/linters/stringly_typed/python/match_analyzer.py +94 -0
  193. src/linters/stringly_typed/python/validation_detector.py +189 -0
  194. src/linters/stringly_typed/python/variable_extractor.py +96 -0
  195. src/linters/stringly_typed/storage.py +620 -0
  196. src/linters/stringly_typed/storage_initializer.py +45 -0
  197. src/linters/stringly_typed/typescript/__init__.py +28 -0
  198. src/linters/stringly_typed/typescript/analyzer.py +157 -0
  199. src/linters/stringly_typed/typescript/call_tracker.py +335 -0
  200. src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
  201. src/linters/stringly_typed/violation_generator.py +419 -0
  202. src/orchestrator/core.py +264 -16
  203. src/orchestrator/language_detector.py +5 -3
  204. src/templates/thailint_config_template.yaml +354 -0
  205. src/utils/project_root.py +138 -16
  206. thailint-0.15.3.dist-info/METADATA +187 -0
  207. thailint-0.15.3.dist-info/RECORD +226 -0
  208. {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +1 -1
  209. thailint-0.15.3.dist-info/entry_points.txt +4 -0
  210. src/cli.py +0 -1055
  211. thailint-0.2.0.dist-info/METADATA +0 -980
  212. thailint-0.2.0.dist-info/RECORD +0 -75
  213. thailint-0.2.0.dist-info/entry_points.txt +0 -4
  214. {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info/licenses}/LICENSE +0 -0
@@ -8,7 +8,7 @@ Overview: Analyzes Python source files to extract code blocks for duplicate dete
8
8
  Filters out docstrings at the tokenization level to prevent false positive duplication
9
9
  detection on documentation strings.
10
10
 
11
- Dependencies: BaseTokenAnalyzer, CodeBlock, DRYConfig, pathlib.Path, ast, TokenHasher
11
+ Dependencies: BaseTokenAnalyzer, CodeBlock, DRYConfig, pathlib.Path, ast, token_hasher module
12
12
 
13
13
  Exports: PythonDuplicateAnalyzer class
14
14
 
@@ -17,6 +17,12 @@ Interfaces: PythonDuplicateAnalyzer.analyze(file_path: Path, content: str, confi
17
17
 
18
18
  Implementation: Uses custom tokenizer that filters docstrings before hashing
19
19
 
20
+ Suppressions:
21
+ - too-many-arguments,too-many-positional-arguments: Line processing with related params
22
+ - type:ignore[arg-type]: ast.get_docstring returns str|None, typing limitation
23
+ - srp.violation: Complex AST analysis algorithm for duplicate detection. See SRP Exception below.
24
+ - nesting.excessive-depth: analyze method uses nested loops for docstring extraction.
25
+
20
26
  SRP Exception: PythonDuplicateAnalyzer has 32 methods and 358 lines (exceeds max 8 methods/200 lines)
21
27
  Justification: Complex AST analysis algorithm for duplicate code detection with sophisticated
22
28
  false positive filtering. Methods form tightly coupled algorithm pipeline: docstring extraction,
@@ -29,18 +35,14 @@ SRP Exception: PythonDuplicateAnalyzer has 32 methods and 358 lines (exceeds max
29
35
  """
30
36
 
31
37
  import ast
32
- from collections.abc import Callable
33
38
  from pathlib import Path
34
- from typing import cast
35
39
 
40
+ from . import token_hasher
36
41
  from .base_token_analyzer import BaseTokenAnalyzer
37
42
  from .block_filter import BlockFilterRegistry, create_default_registry
38
43
  from .cache import CodeBlock
39
44
  from .config import DRYConfig
40
-
41
- # Type alias for AST nodes that have line number attributes
42
- # All stmt and expr nodes have lineno and end_lineno after parsing
43
- ASTWithLineNumbers = ast.stmt | ast.expr
45
+ from .single_statement_detector import SingleStatementDetector
44
46
 
45
47
 
46
48
  class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violation]
@@ -58,8 +60,12 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
58
60
  """
59
61
  super().__init__()
60
62
  self._filter_registry = filter_registry or create_default_registry()
63
+ # Single-statement detector is created per-analysis with cached AST data
64
+ self._statement_detector: SingleStatementDetector | None = None
61
65
 
62
- def analyze(self, file_path: Path, content: str, config: DRYConfig) -> list[CodeBlock]:
66
+ def analyze( # thailint: ignore[nesting.excessive-depth]
67
+ self, file_path: Path, content: str, config: DRYConfig
68
+ ) -> list[CodeBlock]:
63
69
  """Analyze Python file for duplicate code blocks, excluding docstrings.
64
70
 
65
71
  Args:
@@ -70,37 +76,72 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
70
76
  Returns:
71
77
  List of CodeBlock instances with hash values
72
78
  """
73
- # Get docstring line ranges
74
- docstring_ranges = self._get_docstring_ranges_from_content(content)
75
-
76
- # Tokenize with line number tracking
77
- lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
78
-
79
- # Generate rolling hash windows
80
- windows = self._rolling_hash_with_tracking(lines_with_numbers, config.min_duplicate_lines)
81
-
82
- blocks = []
83
- for hash_val, start_line, end_line, snippet in windows:
84
- # Skip blocks that are single logical statements
85
- # Check the original source code, not the normalized snippet
86
- if self._is_single_statement_in_source(content, start_line, end_line):
87
- continue
88
-
89
- block = CodeBlock(
90
- file_path=file_path,
91
- start_line=start_line,
92
- end_line=end_line,
93
- snippet=snippet,
94
- hash_value=hash_val,
79
+ # Performance optimization: Parse AST once and create detector with cached data
80
+ cached_ast = self._parse_content_safe(content)
81
+ line_to_nodes = SingleStatementDetector.build_line_to_node_index(cached_ast)
82
+ self._statement_detector = SingleStatementDetector(cached_ast, content, line_to_nodes)
83
+
84
+ try:
85
+ # Get docstring line ranges
86
+ docstring_ranges = self._get_docstring_ranges_from_content(content)
87
+
88
+ # Tokenize with line number tracking
89
+ lines_with_numbers = self._tokenize_with_line_numbers(content, docstring_ranges)
90
+
91
+ # Generate rolling hash windows
92
+ windows = self._rolling_hash_with_tracking(
93
+ lines_with_numbers, config.min_duplicate_lines
95
94
  )
96
95
 
97
- # Apply extensible filters (keyword arguments, imports, etc.)
98
- if self._filter_registry.should_filter_block(block, content):
99
- continue
96
+ return self._filter_valid_blocks(windows, file_path, content)
97
+ finally:
98
+ # Clear detector after analysis to avoid memory leaks
99
+ self._statement_detector = None
100
+
101
+ def _filter_valid_blocks(
102
+ self,
103
+ windows: list[tuple[int, int, int, str]],
104
+ file_path: Path,
105
+ content: str,
106
+ ) -> list[CodeBlock]:
107
+ """Filter hash windows and create valid CodeBlock instances."""
108
+ return [
109
+ block
110
+ for hash_val, start_line, end_line, snippet in windows
111
+ if (
112
+ block := self._create_block_if_valid(
113
+ file_path, content, hash_val, start_line, end_line, snippet
114
+ )
115
+ )
116
+ ]
117
+
118
+ def _create_block_if_valid( # pylint: disable=too-many-arguments,too-many-positional-arguments
119
+ self,
120
+ file_path: Path,
121
+ content: str,
122
+ hash_val: int,
123
+ start_line: int,
124
+ end_line: int,
125
+ snippet: str,
126
+ ) -> CodeBlock | None:
127
+ """Create CodeBlock if it passes all validation checks."""
128
+ if self._statement_detector and self._statement_detector.is_single_statement(
129
+ content, start_line, end_line
130
+ ):
131
+ return None
132
+
133
+ block = CodeBlock(
134
+ file_path=file_path,
135
+ start_line=start_line,
136
+ end_line=end_line,
137
+ snippet=snippet,
138
+ hash_value=hash_val,
139
+ )
100
140
 
101
- blocks.append(block)
141
+ if self._filter_registry.should_filter_block(block, content):
142
+ return None
102
143
 
103
- return blocks
144
+ return block
104
145
 
105
146
  def _get_docstring_ranges_from_content(self, content: str) -> set[int]:
106
147
  """Extract line numbers that are part of docstrings.
@@ -168,25 +209,44 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
168
209
  List of (original_line_number, normalized_code) tuples
169
210
  """
170
211
  lines_with_numbers = []
212
+ in_multiline_import = False
171
213
 
172
- for line_num, line in enumerate(content.split("\n"), start=1):
173
- # Skip docstring lines
174
- if line_num in docstring_lines:
175
- continue
214
+ non_docstring_lines = (
215
+ (line_num, line)
216
+ for line_num, line in enumerate(content.split("\n"), start=1)
217
+ if line_num not in docstring_lines
218
+ )
219
+ for line_num, line in non_docstring_lines:
220
+ in_multiline_import, normalized = self._normalize_and_filter_line(
221
+ line, in_multiline_import
222
+ )
223
+ if normalized is not None:
224
+ lines_with_numbers.append((line_num, normalized))
176
225
 
177
- # Use hasher's existing tokenization logic
178
- line = self._hasher._strip_comments(line) # pylint: disable=protected-access
179
- line = " ".join(line.split())
226
+ return lines_with_numbers
180
227
 
181
- if not line:
182
- continue
228
+ def _normalize_and_filter_line(
229
+ self, line: str, in_multiline_import: bool
230
+ ) -> tuple[bool, str | None]:
231
+ """Normalize line and check if it should be included.
183
232
 
184
- if self._hasher._is_import_statement(line): # pylint: disable=protected-access
185
- continue
233
+ Args:
234
+ line: Raw source line
235
+ in_multiline_import: Current multi-line import state
186
236
 
187
- lines_with_numbers.append((line_num, line))
237
+ Returns:
238
+ Tuple of (new_import_state, normalized_line or None if should skip)
239
+ """
240
+ normalized = token_hasher.normalize_line(line)
241
+ if not normalized:
242
+ return in_multiline_import, None
188
243
 
189
- return lines_with_numbers
244
+ new_state, should_skip = token_hasher.should_skip_import_line(
245
+ normalized, in_multiline_import
246
+ )
247
+ if should_skip:
248
+ return new_state, None
249
+ return new_state, normalized
190
250
 
191
251
  def _rolling_hash_with_tracking(
192
252
  self, lines_with_numbers: list[tuple[int, str]], window_size: int
@@ -220,14 +280,6 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
220
280
 
221
281
  return hashes
222
282
 
223
- def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
224
- """Check if a line range in the original source is a single logical statement."""
225
- tree = self._parse_content_safe(content)
226
- if tree is None:
227
- return False
228
-
229
- return self._check_overlapping_nodes(tree, start_line, end_line)
230
-
231
283
  @staticmethod
232
284
  def _parse_content_safe(content: str) -> ast.Module | None:
233
285
  """Parse content, returning None on syntax error."""
@@ -235,283 +287,3 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
235
287
  return ast.parse(content)
236
288
  except SyntaxError:
237
289
  return None
238
-
239
- def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
240
- """Check if any AST node overlaps and matches single-statement pattern."""
241
- for node in ast.walk(tree):
242
- if self._node_overlaps_and_matches(node, start_line, end_line):
243
- return True
244
- return False
245
-
246
- def _node_overlaps_and_matches(self, node: ast.AST, start_line: int, end_line: int) -> bool:
247
- """Check if node overlaps with range and matches single-statement pattern."""
248
- if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
249
- return False
250
-
251
- overlaps = not (node.end_lineno < start_line or node.lineno > end_line)
252
- if not overlaps:
253
- return False
254
-
255
- return self._is_single_statement_pattern(node, start_line, end_line)
256
-
257
- def _is_single_statement_pattern(self, node: ast.AST, start_line: int, end_line: int) -> bool:
258
- """Check if an AST node represents a single-statement pattern to filter.
259
-
260
- Args:
261
- node: AST node that overlaps with the line range
262
- start_line: Starting line number (1-indexed)
263
- end_line: Ending line number (1-indexed)
264
-
265
- Returns:
266
- True if this node represents a single logical statement pattern
267
- """
268
- contains = self._node_contains_range(node, start_line, end_line)
269
- if contains is None:
270
- return False
271
-
272
- return self._dispatch_pattern_check(node, start_line, end_line, contains)
273
-
274
- def _node_contains_range(self, node: ast.AST, start_line: int, end_line: int) -> bool | None:
275
- """Check if node completely contains the range. Returns None if invalid."""
276
- if not self._has_valid_line_numbers(node):
277
- return None
278
- # Type narrowing: _has_valid_line_numbers ensures node has line numbers
279
- # Safe to cast after validation check above
280
- typed_node = cast(ASTWithLineNumbers, node)
281
- # Use type: ignore to suppress MyPy's inability to understand runtime validation
282
- return typed_node.lineno <= start_line and typed_node.end_lineno >= end_line # type: ignore[operator]
283
-
284
- @staticmethod
285
- def _has_valid_line_numbers(node: ast.AST) -> bool:
286
- """Check if node has valid line number attributes."""
287
- if not (hasattr(node, "lineno") and hasattr(node, "end_lineno")):
288
- return False
289
- return node.lineno is not None and node.end_lineno is not None
290
-
291
- def _dispatch_pattern_check(
292
- self, node: ast.AST, start_line: int, end_line: int, contains: bool
293
- ) -> bool:
294
- """Dispatch to node-type-specific pattern checkers."""
295
- # Simple containment check for Expr nodes
296
- if isinstance(node, ast.Expr):
297
- return contains
298
-
299
- # Delegate to specialized checkers
300
- return self._check_specific_pattern(node, start_line, end_line, contains)
301
-
302
- def _check_specific_pattern(
303
- self, node: ast.AST, start_line: int, end_line: int, contains: bool
304
- ) -> bool:
305
- """Check specific node types with their pattern rules."""
306
- if isinstance(node, ast.ClassDef):
307
- return self._check_class_def_pattern(node, start_line, end_line)
308
- if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
309
- return self._check_function_def_pattern(node, start_line, end_line)
310
- if isinstance(node, ast.Call):
311
- return self._check_call_pattern(node, start_line, end_line, contains)
312
- if isinstance(node, ast.Assign):
313
- return self._check_assign_pattern(node, start_line, end_line, contains)
314
- return False
315
-
316
- def _check_class_def_pattern(self, node: ast.ClassDef, start_line: int, end_line: int) -> bool:
317
- """Check if range is in class field definitions (not method bodies)."""
318
- first_method_line = self._find_first_method_line(node)
319
- class_start = self._get_class_start_with_decorators(node)
320
- return self._is_in_class_fields_area(
321
- class_start, start_line, end_line, first_method_line, node.end_lineno
322
- )
323
-
324
- @staticmethod
325
- def _find_first_method_line(node: ast.ClassDef) -> int | None:
326
- """Find line number of first method in class."""
327
- for item in node.body:
328
- if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
329
- return item.lineno
330
- return None
331
-
332
- @staticmethod
333
- def _get_class_start_with_decorators(node: ast.ClassDef) -> int:
334
- """Get class start line, including decorators if present."""
335
- if node.decorator_list:
336
- return min(d.lineno for d in node.decorator_list)
337
- return node.lineno
338
-
339
- @staticmethod
340
- def _is_in_class_fields_area(
341
- class_start: int,
342
- start_line: int,
343
- end_line: int,
344
- first_method_line: int | None,
345
- class_end_line: int | None,
346
- ) -> bool:
347
- """Check if range is in class fields area (before methods)."""
348
- if first_method_line is not None:
349
- return class_start <= start_line and end_line < first_method_line
350
- if class_end_line is not None:
351
- return class_start <= start_line and class_end_line >= end_line
352
- return False
353
-
354
- def _check_function_def_pattern(
355
- self, node: ast.FunctionDef | ast.AsyncFunctionDef, start_line: int, end_line: int
356
- ) -> bool:
357
- """Check if range is in function decorator pattern."""
358
- if not node.decorator_list:
359
- return False
360
-
361
- first_decorator_line = min(d.lineno for d in node.decorator_list)
362
- first_body_line = self._get_function_body_start(node)
363
-
364
- if first_body_line is None:
365
- return False
366
-
367
- return start_line >= first_decorator_line and end_line < first_body_line
368
-
369
- @staticmethod
370
- def _get_function_body_start(node: ast.FunctionDef | ast.AsyncFunctionDef) -> int | None:
371
- """Get the line number where function body starts."""
372
- if not node.body or not hasattr(node.body[0], "lineno"):
373
- return None
374
- return node.body[0].lineno
375
-
376
- def _check_call_pattern(
377
- self, node: ast.Call, start_line: int, end_line: int, contains: bool
378
- ) -> bool:
379
- """Check if range is part of a function/constructor call."""
380
- return self._check_multiline_or_contained(node, start_line, end_line, contains)
381
-
382
- def _check_assign_pattern(
383
- self, node: ast.Assign, start_line: int, end_line: int, contains: bool
384
- ) -> bool:
385
- """Check if range is part of a multi-line assignment."""
386
- return self._check_multiline_or_contained(node, start_line, end_line, contains)
387
-
388
- def _check_multiline_or_contained(
389
- self, node: ast.AST, start_line: int, end_line: int, contains: bool
390
- ) -> bool:
391
- """Check if node is multiline containing start, or single-line containing range."""
392
- if not self._has_valid_line_numbers(node):
393
- return False
394
-
395
- # Type narrowing: _has_valid_line_numbers ensures node has line numbers
396
- # Safe to cast after validation check above
397
- typed_node = cast(ASTWithLineNumbers, node)
398
- # Use type: ignore to suppress MyPy's inability to understand runtime validation
399
- is_multiline = typed_node.lineno < typed_node.end_lineno # type: ignore[operator]
400
- if is_multiline:
401
- return typed_node.lineno <= start_line <= typed_node.end_lineno # type: ignore[operator]
402
- return contains
403
-
404
- def _is_standalone_single_statement(
405
- self, lines: list[str], start_line: int, end_line: int
406
- ) -> bool:
407
- """Check if the exact range parses as a single statement on its own."""
408
- source_lines = lines[start_line - 1 : end_line]
409
- source_snippet = "\n".join(source_lines)
410
-
411
- try:
412
- tree = ast.parse(source_snippet)
413
- return len(tree.body) == 1
414
- except SyntaxError:
415
- return False
416
-
417
- def _check_ast_context( # pylint: disable=too-many-arguments,too-many-positional-arguments
418
- self,
419
- lines: list[str],
420
- start_line: int,
421
- end_line: int,
422
- lookback: int,
423
- lookforward: int,
424
- predicate: Callable[[ast.Module, int], bool],
425
- ) -> bool:
426
- """Generic helper for AST-based context checking.
427
-
428
- Args:
429
- lines: Source file lines
430
- start_line: Starting line number (1-indexed)
431
- end_line: Ending line number (1-indexed)
432
- lookback: Number of lines to look backward
433
- lookforward: Number of lines to look forward
434
- predicate: Function that takes AST tree and returns bool
435
-
436
- Returns:
437
- True if predicate returns True for the parsed context
438
- """
439
- lookback_start = max(0, start_line - lookback)
440
- lookforward_end = min(len(lines), end_line + lookforward)
441
-
442
- context_lines = lines[lookback_start:lookforward_end]
443
- context = "\n".join(context_lines)
444
-
445
- try:
446
- tree = ast.parse(context)
447
- return predicate(tree, lookback_start)
448
- except SyntaxError:
449
- pass
450
-
451
- return False
452
-
453
- def _is_part_of_decorator(self, lines: list[str], start_line: int, end_line: int) -> bool:
454
- """Check if lines are part of a decorator + function definition.
455
-
456
- A decorator pattern is @something(...) followed by def/class.
457
- """
458
-
459
- def has_decorators(tree: ast.Module, _lookback_start: int) -> bool:
460
- """Check if any function or class in the tree has decorators."""
461
- for stmt in tree.body:
462
- if isinstance(stmt, (ast.FunctionDef, ast.ClassDef)) and stmt.decorator_list:
463
- return True
464
- return False
465
-
466
- return self._check_ast_context(lines, start_line, end_line, 10, 10, has_decorators)
467
-
468
- def _is_part_of_function_call(self, lines: list[str], start_line: int, end_line: int) -> bool:
469
- """Check if lines are arguments inside a function/constructor call.
470
-
471
- Detects patterns like:
472
- obj = Constructor(
473
- arg1=value1,
474
- arg2=value2,
475
- )
476
- """
477
-
478
- def is_single_non_function_statement(tree: ast.Module, _lookback_start: int) -> bool:
479
- """Check if context has exactly one statement that's not a function/class def."""
480
- return len(tree.body) == 1 and not isinstance(
481
- tree.body[0], (ast.FunctionDef, ast.ClassDef)
482
- )
483
-
484
- return self._check_ast_context(
485
- lines, start_line, end_line, 10, 10, is_single_non_function_statement
486
- )
487
-
488
- def _is_part_of_class_body(self, lines: list[str], start_line: int, end_line: int) -> bool:
489
- """Check if lines are field definitions inside a class body.
490
-
491
- Detects patterns like:
492
- class Foo:
493
- field1: Type1
494
- field2: Type2
495
- """
496
-
497
- def is_within_class_body(tree: ast.Module, lookback_start: int) -> bool:
498
- """Check if flagged range falls within a class body."""
499
- for stmt in tree.body:
500
- if not isinstance(stmt, ast.ClassDef):
501
- continue
502
-
503
- # Adjust line numbers: stmt.lineno is relative to context
504
- # We need to convert back to original file line numbers
505
- class_start_in_context = stmt.lineno
506
- class_end_in_context = stmt.end_lineno if stmt.end_lineno else stmt.lineno
507
-
508
- # Convert to original file line numbers (1-indexed)
509
- class_start_original = lookback_start + class_start_in_context
510
- class_end_original = lookback_start + class_end_in_context
511
-
512
- # Check if the flagged range overlaps with class body
513
- if start_line >= class_start_original and end_line <= class_end_original:
514
- return True
515
- return False
516
-
517
- return self._check_ast_context(lines, start_line, end_line, 10, 5, is_within_class_body)
@@ -0,0 +1,100 @@
1
+ """
2
+ Purpose: Extract Python module-level constants using AST parsing
3
+
4
+ Scope: Python constant extraction for duplicate constants detection
5
+
6
+ Overview: Extracts module-level constant definitions from Python source code using the AST module.
7
+ Identifies constants as module-level assignments where the target name matches the ALL_CAPS
8
+ naming convention (e.g., API_TIMEOUT = 30). Excludes private constants (leading underscore),
9
+ class-level constants, and function-level constants to focus on public module constants that
10
+ should be consolidated across files.
11
+
12
+ Dependencies: Python ast module, re for pattern matching, ConstantInfo from constant module
13
+
14
+ Exports: extract_python_constants function
15
+
16
+ Interfaces: extract_python_constants(content: str) -> list[ConstantInfo]
17
+
18
+ Implementation: AST-based parsing with module-level filtering and ALL_CAPS regex matching
19
+ """
20
+
21
+ import ast
22
+
23
+ from .constant import CONSTANT_NAME_PATTERN, ConstantInfo
24
+
25
+ # Container types with fixed representations
26
+ CONTAINER_REPRESENTATIONS = {ast.List: "[...]", ast.Dict: "{...}", ast.Tuple: "(...)"}
27
+
28
+
29
+ def extract_python_constants(content: str) -> list[ConstantInfo]:
30
+ """Extract constants from Python source code.
31
+
32
+ Args:
33
+ content: Python source code as string
34
+
35
+ Returns:
36
+ List of ConstantInfo for module-level constants
37
+ """
38
+ try:
39
+ tree = ast.parse(content)
40
+ except SyntaxError:
41
+ return []
42
+ constants: list[ConstantInfo] = []
43
+ for node in tree.body:
44
+ constants.extend(_extract_from_node(node))
45
+ return constants
46
+
47
+
48
+ def _extract_from_node(node: ast.stmt) -> list[ConstantInfo]:
49
+ """Extract constants from a single AST node."""
50
+ if isinstance(node, ast.Assign):
51
+ return _extract_from_assign(node)
52
+ if isinstance(node, ast.AnnAssign):
53
+ return _extract_from_ann_assign(node)
54
+ return []
55
+
56
+
57
+ def _extract_from_assign(node: ast.Assign) -> list[ConstantInfo]:
58
+ """Extract constants from a simple assignment."""
59
+ return [info for t in node.targets if (info := _to_const_info(t, node.value, node.lineno))]
60
+
61
+
62
+ def _extract_from_ann_assign(node: ast.AnnAssign) -> list[ConstantInfo]:
63
+ """Extract constants from an annotated assignment."""
64
+ if node.value is None:
65
+ return []
66
+ info = _to_const_info(node.target, node.value, node.lineno)
67
+ return [info] if info else []
68
+
69
+
70
+ def _to_const_info(target: ast.expr, value: ast.expr, lineno: int) -> ConstantInfo | None:
71
+ """Extract constant info from target and value."""
72
+ if not isinstance(target, ast.Name):
73
+ return None
74
+ name = target.id
75
+ if not _is_constant_name(name):
76
+ return None
77
+ return ConstantInfo(name=name, line_number=lineno, value=_get_value_string(value))
78
+
79
+
80
+ def _is_constant_name(name: str) -> bool:
81
+ """Check if name matches constant naming convention."""
82
+ return not name.startswith("_") and bool(CONSTANT_NAME_PATTERN.match(name))
83
+
84
+
85
+ def _get_value_string(value: ast.expr) -> str | None:
86
+ """Get string representation of a value expression."""
87
+ if isinstance(value, ast.Constant):
88
+ return repr(value.value)
89
+ if isinstance(value, ast.Name):
90
+ return value.id
91
+ if isinstance(value, ast.Call):
92
+ return _call_to_string(value)
93
+ return CONTAINER_REPRESENTATIONS.get(type(value))
94
+
95
+
96
+ def _call_to_string(node: ast.Call) -> str:
97
+ """Convert call expression to string."""
98
+ if isinstance(node.func, ast.Name):
99
+ return f"{node.func.id}(...)"
100
+ return "call(...)"