thailint 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. src/__init__.py +1 -0
  2. src/cli/__init__.py +27 -0
  3. src/cli/__main__.py +22 -0
  4. src/cli/config.py +478 -0
  5. src/cli/linters/__init__.py +58 -0
  6. src/cli/linters/code_patterns.py +372 -0
  7. src/cli/linters/code_smells.py +450 -0
  8. src/cli/linters/documentation.py +155 -0
  9. src/cli/linters/shared.py +89 -0
  10. src/cli/linters/structure.py +313 -0
  11. src/cli/linters/structure_quality.py +316 -0
  12. src/cli/main.py +120 -0
  13. src/cli/utils.py +395 -0
  14. src/cli_main.py +34 -0
  15. src/core/types.py +13 -0
  16. src/core/violation_utils.py +69 -0
  17. src/linter_config/ignore.py +32 -16
  18. src/linters/collection_pipeline/linter.py +2 -2
  19. src/linters/dry/block_filter.py +97 -1
  20. src/linters/dry/cache.py +94 -6
  21. src/linters/dry/config.py +47 -10
  22. src/linters/dry/constant.py +92 -0
  23. src/linters/dry/constant_matcher.py +214 -0
  24. src/linters/dry/constant_violation_builder.py +98 -0
  25. src/linters/dry/linter.py +89 -48
  26. src/linters/dry/python_analyzer.py +12 -415
  27. src/linters/dry/python_constant_extractor.py +101 -0
  28. src/linters/dry/single_statement_detector.py +415 -0
  29. src/linters/dry/token_hasher.py +5 -5
  30. src/linters/dry/typescript_analyzer.py +5 -354
  31. src/linters/dry/typescript_constant_extractor.py +134 -0
  32. src/linters/dry/typescript_statement_detector.py +255 -0
  33. src/linters/dry/typescript_value_extractor.py +66 -0
  34. src/linters/file_header/linter.py +2 -2
  35. src/linters/file_placement/linter.py +2 -2
  36. src/linters/file_placement/pattern_matcher.py +19 -5
  37. src/linters/magic_numbers/linter.py +8 -67
  38. src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
  39. src/linters/nesting/linter.py +12 -9
  40. src/linters/print_statements/linter.py +7 -24
  41. src/linters/srp/class_analyzer.py +9 -9
  42. src/linters/srp/heuristics.py +2 -2
  43. src/linters/srp/linter.py +2 -2
  44. src/linters/stateless_class/linter.py +2 -2
  45. src/linters/stringly_typed/__init__.py +36 -0
  46. src/linters/stringly_typed/config.py +190 -0
  47. src/linters/stringly_typed/context_filter.py +451 -0
  48. src/linters/stringly_typed/function_call_violation_builder.py +137 -0
  49. src/linters/stringly_typed/ignore_checker.py +102 -0
  50. src/linters/stringly_typed/ignore_utils.py +51 -0
  51. src/linters/stringly_typed/linter.py +344 -0
  52. src/linters/stringly_typed/python/__init__.py +33 -0
  53. src/linters/stringly_typed/python/analyzer.py +344 -0
  54. src/linters/stringly_typed/python/call_tracker.py +172 -0
  55. src/linters/stringly_typed/python/comparison_tracker.py +252 -0
  56. src/linters/stringly_typed/python/condition_extractor.py +131 -0
  57. src/linters/stringly_typed/python/conditional_detector.py +176 -0
  58. src/linters/stringly_typed/python/constants.py +21 -0
  59. src/linters/stringly_typed/python/match_analyzer.py +88 -0
  60. src/linters/stringly_typed/python/validation_detector.py +186 -0
  61. src/linters/stringly_typed/python/variable_extractor.py +96 -0
  62. src/linters/stringly_typed/storage.py +630 -0
  63. src/linters/stringly_typed/storage_initializer.py +45 -0
  64. src/linters/stringly_typed/typescript/__init__.py +28 -0
  65. src/linters/stringly_typed/typescript/analyzer.py +157 -0
  66. src/linters/stringly_typed/typescript/call_tracker.py +329 -0
  67. src/linters/stringly_typed/typescript/comparison_tracker.py +372 -0
  68. src/linters/stringly_typed/violation_generator.py +376 -0
  69. src/orchestrator/core.py +241 -12
  70. {thailint-0.10.0.dist-info → thailint-0.12.0.dist-info}/METADATA +9 -3
  71. {thailint-0.10.0.dist-info → thailint-0.12.0.dist-info}/RECORD +74 -28
  72. thailint-0.12.0.dist-info/entry_points.txt +4 -0
  73. src/cli.py +0 -2141
  74. thailint-0.10.0.dist-info/entry_points.txt +0 -4
  75. {thailint-0.10.0.dist-info → thailint-0.12.0.dist-info}/WHEEL +0 -0
  76. {thailint-0.10.0.dist-info → thailint-0.12.0.dist-info}/licenses/LICENSE +0 -0
@@ -29,22 +29,13 @@ SRP Exception: PythonDuplicateAnalyzer has 32 methods and 358 lines (exceeds max
29
29
  """
30
30
 
31
31
  import ast
32
- from collections.abc import Callable
33
32
  from pathlib import Path
34
- from typing import cast
35
33
 
36
34
  from .base_token_analyzer import BaseTokenAnalyzer
37
35
  from .block_filter import BlockFilterRegistry, create_default_registry
38
36
  from .cache import CodeBlock
39
37
  from .config import DRYConfig
40
-
41
- # AST context checking constants
42
- AST_LOOKBACK_LINES = 10
43
- AST_LOOKFORWARD_LINES = 5
44
-
45
- # Type alias for AST nodes that have line number attributes
46
- # All stmt and expr nodes have lineno and end_lineno after parsing
47
- ASTWithLineNumbers = ast.stmt | ast.expr
38
+ from .single_statement_detector import SingleStatementDetector
48
39
 
49
40
 
50
41
  class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violation]
@@ -62,11 +53,8 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
62
53
  """
63
54
  super().__init__()
64
55
  self._filter_registry = filter_registry or create_default_registry()
65
- # Performance optimization: Cache parsed AST to avoid re-parsing for each hash window
66
- self._cached_ast: ast.Module | None = None
67
- self._cached_content: str | None = None
68
- # Performance optimization: Line-to-node index for O(1) lookups instead of O(n) ast.walk()
69
- self._line_to_nodes: dict[int, list[ast.AST]] | None = None
56
+ # Single-statement detector is created per-analysis with cached AST data
57
+ self._statement_detector: SingleStatementDetector | None = None
70
58
 
71
59
  def analyze( # thailint: ignore[nesting.excessive-depth]
72
60
  self, file_path: Path, content: str, config: DRYConfig
@@ -81,12 +69,10 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
81
69
  Returns:
82
70
  List of CodeBlock instances with hash values
83
71
  """
84
- # Performance optimization: Parse AST once and cache for _is_single_statement_in_source() calls
85
- self._cached_ast = self._parse_content_safe(content)
86
- self._cached_content = content
87
-
88
- # Performance optimization: Build line-to-node index for O(1) lookups
89
- self._line_to_nodes = self._build_line_to_node_index(self._cached_ast)
72
+ # Performance optimization: Parse AST once and create detector with cached data
73
+ cached_ast = self._parse_content_safe(content)
74
+ line_to_nodes = SingleStatementDetector.build_line_to_node_index(cached_ast)
75
+ self._statement_detector = SingleStatementDetector(cached_ast, content, line_to_nodes)
90
76
 
91
77
  try:
92
78
  # Get docstring line ranges
@@ -102,10 +88,8 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
102
88
 
103
89
  return self._filter_valid_blocks(windows, file_path, content)
104
90
  finally:
105
- # Clear cache after analysis to avoid memory leaks
106
- self._cached_ast = None
107
- self._cached_content = None
108
- self._line_to_nodes = None
91
+ # Clear detector after analysis to avoid memory leaks
92
+ self._statement_detector = None
109
93
 
110
94
  def _filter_valid_blocks(
111
95
  self,
@@ -133,7 +117,9 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
133
117
  snippet: str,
134
118
  ) -> CodeBlock | None:
135
119
  """Create CodeBlock if it passes all validation checks."""
136
- if self._is_single_statement_in_source(content, start_line, end_line):
120
+ if self._statement_detector and self._statement_detector.is_single_statement(
121
+ content, start_line, end_line
122
+ ):
137
123
  return None
138
124
 
139
125
  block = CodeBlock(
@@ -286,24 +272,6 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
286
272
 
287
273
  return hashes
288
274
 
289
- def _is_single_statement_in_source(self, content: str, start_line: int, end_line: int) -> bool:
290
- """Check if a line range in the original source is a single logical statement.
291
-
292
- Performance optimization: Uses cached AST if available (set by analyze() method)
293
- to avoid re-parsing the entire file for each hash window check.
294
- """
295
- # Use cached AST if available and content matches
296
- tree: ast.Module | None
297
- if self._cached_ast is not None and content == self._cached_content:
298
- tree = self._cached_ast
299
- else:
300
- # Fallback: parse content (used by tests or standalone calls)
301
- tree = self._parse_content_safe(content)
302
- if tree is None:
303
- return False
304
-
305
- return self._check_overlapping_nodes(tree, start_line, end_line)
306
-
307
275
  @staticmethod
308
276
  def _parse_content_safe(content: str) -> ast.Module | None:
309
277
  """Parse content, returning None on syntax error."""
@@ -311,374 +279,3 @@ class PythonDuplicateAnalyzer(BaseTokenAnalyzer): # thailint: ignore[srp.violat
311
279
  return ast.parse(content)
312
280
  except SyntaxError:
313
281
  return None
314
-
315
- @staticmethod
316
- def _build_line_to_node_index(tree: ast.Module | None) -> dict[int, list[ast.AST]] | None:
317
- """Build an index mapping each line number to overlapping AST nodes.
318
-
319
- Performance optimization: This allows O(1) lookups instead of O(n) ast.walk() calls.
320
- For a file with 5,144 nodes and 673 hash windows, this reduces 3.46M node operations
321
- to just ~3,365 relevant node checks (99.9% reduction).
322
-
323
- Args:
324
- tree: Parsed AST tree (None if parsing failed)
325
-
326
- Returns:
327
- Dictionary mapping line numbers to list of AST nodes overlapping that line,
328
- or None if tree is None
329
- """
330
- if tree is None:
331
- return None
332
-
333
- line_to_nodes: dict[int, list[ast.AST]] = {}
334
- for node in ast.walk(tree):
335
- if PythonDuplicateAnalyzer._node_has_line_info(node):
336
- PythonDuplicateAnalyzer._add_node_to_index(node, line_to_nodes)
337
-
338
- return line_to_nodes
339
-
340
- @staticmethod
341
- def _node_has_line_info(node: ast.AST) -> bool:
342
- """Check if node has valid line number information."""
343
- if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
344
- return False
345
- return node.lineno is not None and node.end_lineno is not None
346
-
347
- @staticmethod
348
- def _add_node_to_index(node: ast.AST, line_to_nodes: dict[int, list[ast.AST]]) -> None:
349
- """Add node to all lines it overlaps in the index."""
350
- for line_num in range(node.lineno, node.end_lineno + 1): # type: ignore[attr-defined]
351
- if line_num not in line_to_nodes:
352
- line_to_nodes[line_num] = []
353
- line_to_nodes[line_num].append(node)
354
-
355
- def _check_overlapping_nodes(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
356
- """Check if any AST node overlaps and matches single-statement pattern.
357
-
358
- Performance optimization: Use line-to-node index for O(1) lookups instead of O(n) ast.walk().
359
- """
360
- if self._line_to_nodes is not None:
361
- return self._check_nodes_via_index(start_line, end_line)
362
- return self._check_nodes_via_walk(tree, start_line, end_line)
363
-
364
- def _check_nodes_via_index(self, start_line: int, end_line: int) -> bool:
365
- """Check nodes using line-to-node index for O(1) lookups."""
366
- candidates = self._collect_candidate_nodes_from_index(start_line, end_line)
367
- return self._any_node_matches_pattern(candidates, start_line, end_line)
368
-
369
- def _collect_candidate_nodes_from_index(self, start_line: int, end_line: int) -> set[ast.AST]:
370
- """Collect unique nodes that overlap with the line range from index."""
371
- candidate_nodes: set[ast.AST] = set()
372
- for line_num in range(start_line, end_line + 1):
373
- if self._line_to_nodes and line_num in self._line_to_nodes:
374
- candidate_nodes.update(self._line_to_nodes[line_num])
375
- return candidate_nodes
376
-
377
- def _any_node_matches_pattern(
378
- self, nodes: set[ast.AST], start_line: int, end_line: int
379
- ) -> bool:
380
- """Check if any node matches single-statement pattern."""
381
- for node in nodes:
382
- if self._is_single_statement_pattern(node, start_line, end_line):
383
- return True
384
- return False
385
-
386
- def _check_nodes_via_walk(self, tree: ast.Module, start_line: int, end_line: int) -> bool:
387
- """Check nodes using ast.walk() fallback for tests or standalone calls."""
388
- for node in ast.walk(tree):
389
- if self._node_matches_via_walk(node, start_line, end_line):
390
- return True
391
- return False
392
-
393
- def _node_matches_via_walk(self, node: ast.AST, start_line: int, end_line: int) -> bool:
394
- """Check if a single node overlaps and matches pattern."""
395
- if not self._node_overlaps_range(node, start_line, end_line):
396
- return False
397
- return self._is_single_statement_pattern(node, start_line, end_line)
398
-
399
- @staticmethod
400
- def _node_overlaps_range(node: ast.AST, start_line: int, end_line: int) -> bool:
401
- """Check if node overlaps with the given line range."""
402
- if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
403
- return False
404
- node_end = node.end_lineno
405
- node_start = node.lineno
406
- return not (node_end < start_line or node_start > end_line)
407
-
408
- def _node_overlaps_and_matches(self, node: ast.AST, start_line: int, end_line: int) -> bool:
409
- """Check if node overlaps with range and matches single-statement pattern."""
410
- if not hasattr(node, "lineno") or not hasattr(node, "end_lineno"):
411
- return False
412
-
413
- overlaps = not (node.end_lineno < start_line or node.lineno > end_line)
414
- if not overlaps:
415
- return False
416
-
417
- return self._is_single_statement_pattern(node, start_line, end_line)
418
-
419
- def _is_single_statement_pattern(self, node: ast.AST, start_line: int, end_line: int) -> bool:
420
- """Check if an AST node represents a single-statement pattern to filter.
421
-
422
- Args:
423
- node: AST node that overlaps with the line range
424
- start_line: Starting line number (1-indexed)
425
- end_line: Ending line number (1-indexed)
426
-
427
- Returns:
428
- True if this node represents a single logical statement pattern
429
- """
430
- contains = self._node_contains_range(node, start_line, end_line)
431
- if contains is None:
432
- return False
433
-
434
- return self._dispatch_pattern_check(node, start_line, end_line, contains)
435
-
436
- def _node_contains_range(self, node: ast.AST, start_line: int, end_line: int) -> bool | None:
437
- """Check if node completely contains the range. Returns None if invalid."""
438
- if not self._has_valid_line_numbers(node):
439
- return None
440
- # Type narrowing: _has_valid_line_numbers ensures node has line numbers
441
- # Safe to cast after validation check above
442
- typed_node = cast(ASTWithLineNumbers, node)
443
- # Use type: ignore to suppress MyPy's inability to understand runtime validation
444
- return typed_node.lineno <= start_line and typed_node.end_lineno >= end_line # type: ignore[operator]
445
-
446
- @staticmethod
447
- def _has_valid_line_numbers(node: ast.AST) -> bool:
448
- """Check if node has valid line number attributes."""
449
- if not (hasattr(node, "lineno") and hasattr(node, "end_lineno")):
450
- return False
451
- return node.lineno is not None and node.end_lineno is not None
452
-
453
- def _dispatch_pattern_check(
454
- self, node: ast.AST, start_line: int, end_line: int, contains: bool
455
- ) -> bool:
456
- """Dispatch to node-type-specific pattern checkers."""
457
- # Simple containment check for Expr nodes
458
- if isinstance(node, ast.Expr):
459
- return contains
460
-
461
- # Delegate to specialized checkers
462
- return self._check_specific_pattern(node, start_line, end_line, contains)
463
-
464
- def _check_specific_pattern(
465
- self, node: ast.AST, start_line: int, end_line: int, contains: bool
466
- ) -> bool:
467
- """Check specific node types with their pattern rules."""
468
- if isinstance(node, ast.ClassDef):
469
- return self._check_class_def_pattern(node, start_line, end_line)
470
- if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
471
- return self._check_function_def_pattern(node, start_line, end_line)
472
- if isinstance(node, ast.Call):
473
- return self._check_call_pattern(node, start_line, end_line, contains)
474
- if isinstance(node, ast.Assign):
475
- return self._check_assign_pattern(node, start_line, end_line, contains)
476
- return False
477
-
478
- def _check_class_def_pattern(self, node: ast.ClassDef, start_line: int, end_line: int) -> bool:
479
- """Check if range is in class field definitions (not method bodies)."""
480
- first_method_line = self._find_first_method_line(node)
481
- class_start = self._get_class_start_with_decorators(node)
482
- return self._is_in_class_fields_area(
483
- class_start, start_line, end_line, first_method_line, node.end_lineno
484
- )
485
-
486
- @staticmethod
487
- def _find_first_method_line(node: ast.ClassDef) -> int | None:
488
- """Find line number of first method in class."""
489
- for item in node.body:
490
- if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
491
- return item.lineno
492
- return None
493
-
494
- @staticmethod
495
- def _get_class_start_with_decorators(node: ast.ClassDef) -> int:
496
- """Get class start line, including decorators if present."""
497
- if node.decorator_list:
498
- return min(d.lineno for d in node.decorator_list)
499
- return node.lineno
500
-
501
- @staticmethod
502
- def _is_in_class_fields_area(
503
- class_start: int,
504
- start_line: int,
505
- end_line: int,
506
- first_method_line: int | None,
507
- class_end_line: int | None,
508
- ) -> bool:
509
- """Check if range is in class fields area (before methods)."""
510
- if first_method_line is not None:
511
- return class_start <= start_line and end_line < first_method_line
512
- if class_end_line is not None:
513
- return class_start <= start_line and class_end_line >= end_line
514
- return False
515
-
516
- def _check_function_def_pattern(
517
- self, node: ast.FunctionDef | ast.AsyncFunctionDef, start_line: int, end_line: int
518
- ) -> bool:
519
- """Check if range is in function decorator pattern."""
520
- if not node.decorator_list:
521
- return False
522
-
523
- first_decorator_line = min(d.lineno for d in node.decorator_list)
524
- first_body_line = self._get_function_body_start(node)
525
-
526
- if first_body_line is None:
527
- return False
528
-
529
- return start_line >= first_decorator_line and end_line < first_body_line
530
-
531
- @staticmethod
532
- def _get_function_body_start(node: ast.FunctionDef | ast.AsyncFunctionDef) -> int | None:
533
- """Get the line number where function body starts."""
534
- if not node.body or not hasattr(node.body[0], "lineno"):
535
- return None
536
- return node.body[0].lineno
537
-
538
- def _check_call_pattern(
539
- self, node: ast.Call, start_line: int, end_line: int, contains: bool
540
- ) -> bool:
541
- """Check if range is part of a function/constructor call."""
542
- return self._check_multiline_or_contained(node, start_line, end_line, contains)
543
-
544
- def _check_assign_pattern(
545
- self, node: ast.Assign, start_line: int, end_line: int, contains: bool
546
- ) -> bool:
547
- """Check if range is part of a multi-line assignment."""
548
- return self._check_multiline_or_contained(node, start_line, end_line, contains)
549
-
550
- def _check_multiline_or_contained(
551
- self, node: ast.AST, start_line: int, end_line: int, contains: bool
552
- ) -> bool:
553
- """Check if node is multiline containing start, or single-line containing range."""
554
- if not self._has_valid_line_numbers(node):
555
- return False
556
-
557
- # Type narrowing: _has_valid_line_numbers ensures node has line numbers
558
- # Safe to cast after validation check above
559
- typed_node = cast(ASTWithLineNumbers, node)
560
- # Use type: ignore to suppress MyPy's inability to understand runtime validation
561
- is_multiline = typed_node.lineno < typed_node.end_lineno # type: ignore[operator]
562
- if is_multiline:
563
- return typed_node.lineno <= start_line <= typed_node.end_lineno # type: ignore[operator]
564
- return contains
565
-
566
- def _is_standalone_single_statement(
567
- self, lines: list[str], start_line: int, end_line: int
568
- ) -> bool:
569
- """Check if the exact range parses as a single statement on its own."""
570
- source_lines = lines[start_line - 1 : end_line]
571
- source_snippet = "\n".join(source_lines)
572
-
573
- try:
574
- tree = ast.parse(source_snippet)
575
- return len(tree.body) == 1
576
- except SyntaxError:
577
- return False
578
-
579
- def _check_ast_context( # pylint: disable=too-many-arguments,too-many-positional-arguments
580
- self,
581
- lines: list[str],
582
- start_line: int,
583
- end_line: int,
584
- lookback: int,
585
- lookforward: int,
586
- predicate: Callable[[ast.Module, int], bool],
587
- ) -> bool:
588
- """Generic helper for AST-based context checking.
589
-
590
- Args:
591
- lines: Source file lines
592
- start_line: Starting line number (1-indexed)
593
- end_line: Ending line number (1-indexed)
594
- lookback: Number of lines to look backward
595
- lookforward: Number of lines to look forward
596
- predicate: Function that takes AST tree and returns bool
597
-
598
- Returns:
599
- True if predicate returns True for the parsed context
600
- """
601
- lookback_start = max(0, start_line - lookback)
602
- lookforward_end = min(len(lines), end_line + lookforward)
603
-
604
- context_lines = lines[lookback_start:lookforward_end]
605
- context = "\n".join(context_lines)
606
-
607
- try:
608
- tree = ast.parse(context)
609
- return predicate(tree, lookback_start)
610
- except SyntaxError:
611
- pass
612
-
613
- return False
614
-
615
- def _is_part_of_decorator(self, lines: list[str], start_line: int, end_line: int) -> bool:
616
- """Check if lines are part of a decorator + function definition.
617
-
618
- A decorator pattern is @something(...) followed by def/class.
619
- """
620
-
621
- def has_decorators(tree: ast.Module, _lookback_start: int) -> bool:
622
- """Check if any function or class in the tree has decorators."""
623
- for stmt in tree.body:
624
- if isinstance(stmt, (ast.FunctionDef, ast.ClassDef)) and stmt.decorator_list:
625
- return True
626
- return False
627
-
628
- return self._check_ast_context(lines, start_line, end_line, 10, 10, has_decorators)
629
-
630
- def _is_part_of_function_call(self, lines: list[str], start_line: int, end_line: int) -> bool:
631
- """Check if lines are arguments inside a function/constructor call.
632
-
633
- Detects patterns like:
634
- obj = Constructor(
635
- arg1=value1,
636
- arg2=value2,
637
- )
638
- """
639
-
640
- def is_single_non_function_statement(tree: ast.Module, _lookback_start: int) -> bool:
641
- """Check if context has exactly one statement that's not a function/class def."""
642
- return len(tree.body) == 1 and not isinstance(
643
- tree.body[0], (ast.FunctionDef, ast.ClassDef)
644
- )
645
-
646
- return self._check_ast_context(
647
- lines, start_line, end_line, 10, 10, is_single_non_function_statement
648
- )
649
-
650
- def _is_part_of_class_body(self, lines: list[str], start_line: int, end_line: int) -> bool:
651
- """Check if lines are field definitions inside a class body.
652
-
653
- Detects patterns like:
654
- class Foo:
655
- field1: Type1
656
- field2: Type2
657
- """
658
-
659
- def is_within_class_body(tree: ast.Module, lookback_start: int) -> bool:
660
- """Check if flagged range falls within a class body."""
661
- class_defs = (s for s in tree.body if isinstance(s, ast.ClassDef))
662
- for stmt in class_defs:
663
- # Adjust line numbers: stmt.lineno is relative to context
664
- # We need to convert back to original file line numbers
665
- class_start_in_context = stmt.lineno
666
- class_end_in_context = stmt.end_lineno if stmt.end_lineno else stmt.lineno
667
-
668
- # Convert to original file line numbers (1-indexed)
669
- class_start_original = lookback_start + class_start_in_context
670
- class_end_original = lookback_start + class_end_in_context
671
-
672
- # Check if the flagged range overlaps with class body
673
- if start_line >= class_start_original and end_line <= class_end_original:
674
- return True
675
- return False
676
-
677
- return self._check_ast_context(
678
- lines,
679
- start_line,
680
- end_line,
681
- AST_LOOKBACK_LINES,
682
- AST_LOOKFORWARD_LINES,
683
- is_within_class_body,
684
- )
@@ -0,0 +1,101 @@
1
+ """
2
+ Purpose: Extract Python module-level constants using AST parsing
3
+
4
+ Scope: Python constant extraction for duplicate constants detection
5
+
6
+ Overview: Extracts module-level constant definitions from Python source code using the AST module.
7
+ Identifies constants as module-level assignments where the target name matches the ALL_CAPS
8
+ naming convention (e.g., API_TIMEOUT = 30). Excludes private constants (leading underscore),
9
+ class-level constants, and function-level constants to focus on public module constants that
10
+ should be consolidated across files.
11
+
12
+ Dependencies: Python ast module, re for pattern matching, ConstantInfo from constant module
13
+
14
+ Exports: PythonConstantExtractor class
15
+
16
+ Interfaces: PythonConstantExtractor.extract(content: str) -> list[ConstantInfo]
17
+
18
+ Implementation: AST-based parsing with module-level filtering and ALL_CAPS regex matching
19
+ """
20
+
21
+ import ast
22
+
23
+ from .constant import CONSTANT_NAME_PATTERN, ConstantInfo
24
+
25
+ # Container types with fixed representations
26
+ CONTAINER_REPRESENTATIONS = {ast.List: "[...]", ast.Dict: "{...}", ast.Tuple: "(...)"}
27
+
28
+
29
+ class PythonConstantExtractor:
30
+ """Extracts module-level constants from Python source code."""
31
+
32
+ def extract(self, content: str) -> list[ConstantInfo]:
33
+ """Extract constants from Python source code."""
34
+ try:
35
+ tree = ast.parse(content)
36
+ except SyntaxError:
37
+ return []
38
+ constants: list[ConstantInfo] = []
39
+ for node in tree.body:
40
+ constants.extend(self._extract_from_node(node))
41
+ return constants
42
+
43
+ def _extract_from_node(self, node: ast.stmt) -> list[ConstantInfo]:
44
+ """Extract constants from a single AST node."""
45
+ if isinstance(node, ast.Assign):
46
+ return self._extract_from_assign(node)
47
+ if isinstance(node, ast.AnnAssign):
48
+ return self._extract_from_ann_assign(node)
49
+ return []
50
+
51
+ def _extract_from_assign(self, node: ast.Assign) -> list[ConstantInfo]:
52
+ """Extract constants from a simple assignment."""
53
+ return [
54
+ info for t in node.targets if (info := self._to_const_info(t, node.value, node.lineno))
55
+ ]
56
+
57
+ def _extract_from_ann_assign(self, node: ast.AnnAssign) -> list[ConstantInfo]:
58
+ """Extract constants from an annotated assignment."""
59
+ if node.value is None:
60
+ return []
61
+ info = self._to_const_info(node.target, node.value, node.lineno)
62
+ return [info] if info else []
63
+
64
+ def _to_const_info(self, target: ast.expr, value: ast.expr, lineno: int) -> ConstantInfo | None:
65
+ """Extract constant info from target and value."""
66
+ if not isinstance(target, ast.Name):
67
+ return None
68
+ name = target.id
69
+ if not _is_constant_name(name):
70
+ return None
71
+ return ConstantInfo(name=name, line_number=lineno, value=_get_value_string(value))
72
+
73
+
74
+ def _is_constant_name(name: str) -> bool:
75
+ """Check if name matches constant naming convention."""
76
+ return not name.startswith("_") and bool(CONSTANT_NAME_PATTERN.match(name))
77
+
78
+
79
+ def _get_value_string(value: ast.expr) -> str | None:
80
+ """Get string representation of a value expression."""
81
+ if isinstance(value, (ast.Constant, ast.Num, ast.Str)):
82
+ return _literal_repr(value)
83
+ if isinstance(value, ast.Name):
84
+ return value.id
85
+ if isinstance(value, ast.Call):
86
+ return _call_to_string(value)
87
+ return CONTAINER_REPRESENTATIONS.get(type(value))
88
+
89
+
90
+ def _literal_repr(node: ast.expr) -> str:
91
+ """Get repr of a literal node."""
92
+ if isinstance(node, ast.Constant):
93
+ return repr(node.value)
94
+ return repr(getattr(node, "n", None) or getattr(node, "s", None))
95
+
96
+
97
+ def _call_to_string(node: ast.Call) -> str:
98
+ """Convert call expression to string."""
99
+ if isinstance(node.func, ast.Name):
100
+ return f"{node.func.id}(...)"
101
+ return "call(...)"