thailint 0.5.0__py3-none-any.whl → 0.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +1 -0
- src/analyzers/__init__.py +4 -3
- src/analyzers/ast_utils.py +54 -0
- src/analyzers/rust_base.py +155 -0
- src/analyzers/rust_context.py +141 -0
- src/analyzers/typescript_base.py +4 -0
- src/cli/__init__.py +30 -0
- src/cli/__main__.py +22 -0
- src/cli/config.py +480 -0
- src/cli/config_merge.py +241 -0
- src/cli/linters/__init__.py +67 -0
- src/cli/linters/code_patterns.py +270 -0
- src/cli/linters/code_smells.py +342 -0
- src/cli/linters/documentation.py +83 -0
- src/cli/linters/performance.py +287 -0
- src/cli/linters/shared.py +331 -0
- src/cli/linters/structure.py +327 -0
- src/cli/linters/structure_quality.py +328 -0
- src/cli/main.py +120 -0
- src/cli/utils.py +395 -0
- src/cli_main.py +37 -0
- src/config.py +38 -25
- src/core/base.py +7 -2
- src/core/cli_utils.py +19 -2
- src/core/config_parser.py +5 -2
- src/core/constants.py +54 -0
- src/core/linter_utils.py +95 -6
- src/core/python_lint_rule.py +101 -0
- src/core/registry.py +1 -1
- src/core/rule_discovery.py +147 -84
- src/core/types.py +13 -0
- src/core/violation_builder.py +78 -15
- src/core/violation_utils.py +69 -0
- src/formatters/__init__.py +22 -0
- src/formatters/sarif.py +202 -0
- src/linter_config/directive_markers.py +109 -0
- src/linter_config/ignore.py +254 -395
- src/linter_config/loader.py +45 -12
- src/linter_config/pattern_utils.py +65 -0
- src/linter_config/rule_matcher.py +89 -0
- src/linters/collection_pipeline/__init__.py +90 -0
- src/linters/collection_pipeline/any_all_analyzer.py +281 -0
- src/linters/collection_pipeline/ast_utils.py +40 -0
- src/linters/collection_pipeline/config.py +75 -0
- src/linters/collection_pipeline/continue_analyzer.py +94 -0
- src/linters/collection_pipeline/detector.py +360 -0
- src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
- src/linters/collection_pipeline/linter.py +420 -0
- src/linters/collection_pipeline/suggestion_builder.py +130 -0
- src/linters/cqs/__init__.py +54 -0
- src/linters/cqs/config.py +55 -0
- src/linters/cqs/function_analyzer.py +201 -0
- src/linters/cqs/input_detector.py +139 -0
- src/linters/cqs/linter.py +159 -0
- src/linters/cqs/output_detector.py +84 -0
- src/linters/cqs/python_analyzer.py +54 -0
- src/linters/cqs/types.py +82 -0
- src/linters/cqs/typescript_cqs_analyzer.py +61 -0
- src/linters/cqs/typescript_function_analyzer.py +192 -0
- src/linters/cqs/typescript_input_detector.py +203 -0
- src/linters/cqs/typescript_output_detector.py +117 -0
- src/linters/cqs/violation_builder.py +94 -0
- src/linters/dry/base_token_analyzer.py +16 -9
- src/linters/dry/block_filter.py +120 -20
- src/linters/dry/block_grouper.py +4 -0
- src/linters/dry/cache.py +104 -10
- src/linters/dry/cache_query.py +4 -0
- src/linters/dry/config.py +54 -11
- src/linters/dry/constant.py +92 -0
- src/linters/dry/constant_matcher.py +223 -0
- src/linters/dry/constant_violation_builder.py +98 -0
- src/linters/dry/duplicate_storage.py +5 -4
- src/linters/dry/file_analyzer.py +4 -2
- src/linters/dry/inline_ignore.py +7 -16
- src/linters/dry/linter.py +183 -48
- src/linters/dry/python_analyzer.py +60 -439
- src/linters/dry/python_constant_extractor.py +100 -0
- src/linters/dry/single_statement_detector.py +417 -0
- src/linters/dry/token_hasher.py +116 -112
- src/linters/dry/typescript_analyzer.py +68 -382
- src/linters/dry/typescript_constant_extractor.py +138 -0
- src/linters/dry/typescript_statement_detector.py +255 -0
- src/linters/dry/typescript_value_extractor.py +70 -0
- src/linters/dry/violation_builder.py +4 -0
- src/linters/dry/violation_filter.py +5 -4
- src/linters/dry/violation_generator.py +71 -14
- src/linters/file_header/atemporal_detector.py +68 -50
- src/linters/file_header/base_parser.py +93 -0
- src/linters/file_header/bash_parser.py +66 -0
- src/linters/file_header/config.py +90 -16
- src/linters/file_header/css_parser.py +70 -0
- src/linters/file_header/field_validator.py +36 -33
- src/linters/file_header/linter.py +140 -144
- src/linters/file_header/markdown_parser.py +130 -0
- src/linters/file_header/python_parser.py +14 -58
- src/linters/file_header/typescript_parser.py +73 -0
- src/linters/file_header/violation_builder.py +13 -12
- src/linters/file_placement/config_loader.py +3 -1
- src/linters/file_placement/directory_matcher.py +4 -0
- src/linters/file_placement/linter.py +66 -34
- src/linters/file_placement/pattern_matcher.py +41 -6
- src/linters/file_placement/pattern_validator.py +31 -12
- src/linters/file_placement/rule_checker.py +12 -7
- src/linters/lazy_ignores/__init__.py +43 -0
- src/linters/lazy_ignores/config.py +74 -0
- src/linters/lazy_ignores/directive_utils.py +164 -0
- src/linters/lazy_ignores/header_parser.py +177 -0
- src/linters/lazy_ignores/linter.py +158 -0
- src/linters/lazy_ignores/matcher.py +168 -0
- src/linters/lazy_ignores/python_analyzer.py +209 -0
- src/linters/lazy_ignores/rule_id_utils.py +180 -0
- src/linters/lazy_ignores/skip_detector.py +298 -0
- src/linters/lazy_ignores/types.py +71 -0
- src/linters/lazy_ignores/typescript_analyzer.py +146 -0
- src/linters/lazy_ignores/violation_builder.py +135 -0
- src/linters/lbyl/__init__.py +31 -0
- src/linters/lbyl/config.py +63 -0
- src/linters/lbyl/linter.py +67 -0
- src/linters/lbyl/pattern_detectors/__init__.py +53 -0
- src/linters/lbyl/pattern_detectors/base.py +63 -0
- src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
- src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
- src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
- src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
- src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
- src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
- src/linters/lbyl/python_analyzer.py +215 -0
- src/linters/lbyl/violation_builder.py +354 -0
- src/linters/magic_numbers/context_analyzer.py +227 -225
- src/linters/magic_numbers/linter.py +28 -82
- src/linters/magic_numbers/python_analyzer.py +4 -16
- src/linters/magic_numbers/typescript_analyzer.py +9 -12
- src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
- src/linters/method_property/__init__.py +49 -0
- src/linters/method_property/config.py +138 -0
- src/linters/method_property/linter.py +414 -0
- src/linters/method_property/python_analyzer.py +473 -0
- src/linters/method_property/violation_builder.py +119 -0
- src/linters/nesting/linter.py +24 -16
- src/linters/nesting/python_analyzer.py +4 -0
- src/linters/nesting/typescript_analyzer.py +6 -12
- src/linters/nesting/violation_builder.py +1 -0
- src/linters/performance/__init__.py +91 -0
- src/linters/performance/config.py +43 -0
- src/linters/performance/constants.py +49 -0
- src/linters/performance/linter.py +149 -0
- src/linters/performance/python_analyzer.py +365 -0
- src/linters/performance/regex_analyzer.py +312 -0
- src/linters/performance/regex_linter.py +139 -0
- src/linters/performance/typescript_analyzer.py +236 -0
- src/linters/performance/violation_builder.py +160 -0
- src/linters/print_statements/config.py +7 -12
- src/linters/print_statements/linter.py +26 -43
- src/linters/print_statements/python_analyzer.py +91 -93
- src/linters/print_statements/typescript_analyzer.py +15 -25
- src/linters/print_statements/violation_builder.py +12 -14
- src/linters/srp/class_analyzer.py +11 -7
- src/linters/srp/heuristics.py +56 -22
- src/linters/srp/linter.py +15 -16
- src/linters/srp/python_analyzer.py +55 -20
- src/linters/srp/typescript_metrics_calculator.py +110 -50
- src/linters/stateless_class/__init__.py +25 -0
- src/linters/stateless_class/config.py +58 -0
- src/linters/stateless_class/linter.py +349 -0
- src/linters/stateless_class/python_analyzer.py +290 -0
- src/linters/stringly_typed/__init__.py +36 -0
- src/linters/stringly_typed/config.py +189 -0
- src/linters/stringly_typed/context_filter.py +451 -0
- src/linters/stringly_typed/function_call_violation_builder.py +135 -0
- src/linters/stringly_typed/ignore_checker.py +100 -0
- src/linters/stringly_typed/ignore_utils.py +51 -0
- src/linters/stringly_typed/linter.py +376 -0
- src/linters/stringly_typed/python/__init__.py +33 -0
- src/linters/stringly_typed/python/analyzer.py +348 -0
- src/linters/stringly_typed/python/call_tracker.py +175 -0
- src/linters/stringly_typed/python/comparison_tracker.py +257 -0
- src/linters/stringly_typed/python/condition_extractor.py +134 -0
- src/linters/stringly_typed/python/conditional_detector.py +179 -0
- src/linters/stringly_typed/python/constants.py +21 -0
- src/linters/stringly_typed/python/match_analyzer.py +94 -0
- src/linters/stringly_typed/python/validation_detector.py +189 -0
- src/linters/stringly_typed/python/variable_extractor.py +96 -0
- src/linters/stringly_typed/storage.py +620 -0
- src/linters/stringly_typed/storage_initializer.py +45 -0
- src/linters/stringly_typed/typescript/__init__.py +28 -0
- src/linters/stringly_typed/typescript/analyzer.py +157 -0
- src/linters/stringly_typed/typescript/call_tracker.py +335 -0
- src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
- src/linters/stringly_typed/violation_generator.py +419 -0
- src/orchestrator/core.py +252 -14
- src/orchestrator/language_detector.py +5 -3
- src/templates/thailint_config_template.yaml +196 -0
- src/utils/project_root.py +3 -0
- thailint-0.15.3.dist-info/METADATA +187 -0
- thailint-0.15.3.dist-info/RECORD +226 -0
- thailint-0.15.3.dist-info/entry_points.txt +4 -0
- src/cli.py +0 -1665
- thailint-0.5.0.dist-info/METADATA +0 -1286
- thailint-0.5.0.dist-info/RECORD +0 -96
- thailint-0.5.0.dist-info/entry_points.txt +0 -4
- {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +0 -0
- {thailint-0.5.0.dist-info → thailint-0.15.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Dataclasses for duplicate constants detection in DRY linter
|
|
3
|
+
|
|
4
|
+
Scope: Data structures for constant extraction and cross-file detection
|
|
5
|
+
|
|
6
|
+
Overview: Provides dataclasses for representing constants extracted from source code and their
|
|
7
|
+
locations across multiple files. ConstantInfo stores extracted constant metadata (name, line,
|
|
8
|
+
value) from a single file. ConstantLocation represents where a constant appears across the
|
|
9
|
+
project. ConstantGroup represents a group of related constants (exact or fuzzy matches) for
|
|
10
|
+
violation reporting. These structures support the duplicate constants detection feature that
|
|
11
|
+
identifies when the same constant name appears in multiple files.
|
|
12
|
+
|
|
13
|
+
Dependencies: Python dataclasses module, pathlib for Path types
|
|
14
|
+
|
|
15
|
+
Exports: ConstantInfo, ConstantLocation, ConstantGroup dataclasses
|
|
16
|
+
|
|
17
|
+
Interfaces: Dataclass constructors with named fields
|
|
18
|
+
|
|
19
|
+
Implementation: Immutable dataclasses with optional fields for extracted value context
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
# Shared pattern for ALL_CAPS constant names (public only, no leading underscore)
|
|
27
|
+
# Used by both Python and TypeScript constant extractors
|
|
28
|
+
# Requires at least 2 characters to exclude single-letter type params (P, T, K, V)
|
|
29
|
+
CONSTANT_NAME_PATTERN = re.compile(r"^[A-Z][A-Z0-9_]+$")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ConstantInfo:
|
|
34
|
+
"""Information about a constant extracted from source code.
|
|
35
|
+
|
|
36
|
+
Represents a single constant definition found during file analysis.
|
|
37
|
+
Used during the collection phase before cross-file matching.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
name: str # Constant name (e.g., "API_TIMEOUT")
|
|
41
|
+
line_number: int # Line where constant is defined
|
|
42
|
+
value: str | None = None # Optional: the value (for violation message context)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class ConstantLocation:
|
|
47
|
+
"""Location of a constant in the project.
|
|
48
|
+
|
|
49
|
+
Represents where a specific constant appears, including file path,
|
|
50
|
+
line number, and the value assigned. Used for cross-file reporting.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
file_path: Path
|
|
54
|
+
line_number: int
|
|
55
|
+
name: str
|
|
56
|
+
value: str | None = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class ConstantGroup:
|
|
61
|
+
"""A group of related constants for violation reporting.
|
|
62
|
+
|
|
63
|
+
Groups constants that match (either exactly or via fuzzy matching)
|
|
64
|
+
across multiple files. Used by the violation builder to generate
|
|
65
|
+
comprehensive violation messages.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
# The canonical name (first seen or most common)
|
|
69
|
+
canonical_name: str
|
|
70
|
+
|
|
71
|
+
# All locations where this constant (or fuzzy match) appears
|
|
72
|
+
locations: list[ConstantLocation] = field(default_factory=list)
|
|
73
|
+
|
|
74
|
+
# All names in this group (for fuzzy matches, may include variants)
|
|
75
|
+
all_names: set[str] = field(default_factory=set)
|
|
76
|
+
|
|
77
|
+
# Whether this is a fuzzy match (True) or exact match (False)
|
|
78
|
+
is_fuzzy_match: bool = False
|
|
79
|
+
|
|
80
|
+
def add_location(self, location: ConstantLocation) -> None:
|
|
81
|
+
"""Add a location to this group.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
location: The constant location to add
|
|
85
|
+
"""
|
|
86
|
+
self.locations.append(location)
|
|
87
|
+
self.all_names.add(location.name)
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def file_count(self) -> int:
|
|
91
|
+
"""Number of unique files containing this constant."""
|
|
92
|
+
return len({loc.file_path for loc in self.locations})
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Fuzzy matching for constant names across files
|
|
3
|
+
|
|
4
|
+
Scope: Constant name matching with word-set and edit distance algorithms
|
|
5
|
+
|
|
6
|
+
Overview: Implements fuzzy matching strategies to identify related constants across files. Uses
|
|
7
|
+
two matching strategies: word-set matching (same words in different order, e.g., API_TIMEOUT
|
|
8
|
+
and TIMEOUT_API) and edit distance matching (typos within Levenshtein distance <= 2, e.g.,
|
|
9
|
+
MAX_RETRYS and MAX_RETRIES). Single-word constants (e.g., MAX, TIMEOUT) only use exact
|
|
10
|
+
matching to avoid false positives. Groups related constants into ConstantGroup instances
|
|
11
|
+
for violation reporting.
|
|
12
|
+
|
|
13
|
+
Dependencies: ConstantInfo, ConstantLocation, ConstantGroup from constant module
|
|
14
|
+
|
|
15
|
+
Exports: find_constant_groups function
|
|
16
|
+
|
|
17
|
+
Interfaces: find_constant_groups(constants) -> list[ConstantGroup]
|
|
18
|
+
|
|
19
|
+
Implementation: Union-Find algorithm for grouping, word-set hashing, Levenshtein distance calculation
|
|
20
|
+
|
|
21
|
+
Suppressions:
|
|
22
|
+
- arguments-out-of-order: Named arguments used for clarity in ConstantLocation
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from collections.abc import Callable
|
|
26
|
+
from itertools import combinations
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
|
|
29
|
+
from .constant import ConstantGroup, ConstantInfo, ConstantLocation
|
|
30
|
+
|
|
31
|
+
# Maximum edit distance for fuzzy matching
|
|
32
|
+
MAX_EDIT_DISTANCE = 2
|
|
33
|
+
|
|
34
|
+
# Antonym pairs that should not be fuzzy-matched
|
|
35
|
+
# If one name contains a word from the left side and the other contains the right side,
|
|
36
|
+
# they represent different concepts and should not be grouped together
|
|
37
|
+
ANTONYM_PAIRS = frozenset(
|
|
38
|
+
(
|
|
39
|
+
frozenset(("max", "min")),
|
|
40
|
+
frozenset(("start", "end")),
|
|
41
|
+
frozenset(("first", "last")),
|
|
42
|
+
frozenset(("before", "after")),
|
|
43
|
+
frozenset(("open", "close")),
|
|
44
|
+
frozenset(("read", "write")),
|
|
45
|
+
frozenset(("get", "set")),
|
|
46
|
+
frozenset(("push", "pop")),
|
|
47
|
+
frozenset(("add", "remove")),
|
|
48
|
+
frozenset(("create", "delete")),
|
|
49
|
+
frozenset(("enable", "disable")),
|
|
50
|
+
frozenset(("show", "hide")),
|
|
51
|
+
frozenset(("up", "down")),
|
|
52
|
+
frozenset(("left", "right")),
|
|
53
|
+
frozenset(("top", "bottom")),
|
|
54
|
+
frozenset(("prev", "next")),
|
|
55
|
+
frozenset(("success", "failure")),
|
|
56
|
+
frozenset(("true", "false")),
|
|
57
|
+
frozenset(("on", "off")),
|
|
58
|
+
frozenset(("in", "out")),
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Minimum length for constant names (exclude single-letter type params like P, T, K, V)
|
|
63
|
+
MIN_CONSTANT_NAME_LENGTH = 2
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class UnionFind:
|
|
67
|
+
"""Union-Find data structure for grouping."""
|
|
68
|
+
|
|
69
|
+
def __init__(self, items: list[str]) -> None:
|
|
70
|
+
"""Initialize with list of items."""
|
|
71
|
+
self._parent = {item: item for item in items}
|
|
72
|
+
|
|
73
|
+
def find(self, x: str) -> str:
|
|
74
|
+
"""Find root with path compression."""
|
|
75
|
+
if self._parent[x] != x:
|
|
76
|
+
self._parent[x] = self.find(self._parent[x])
|
|
77
|
+
return self._parent[x]
|
|
78
|
+
|
|
79
|
+
def union(self, x: str, y: str) -> None:
|
|
80
|
+
"""Merge two sets."""
|
|
81
|
+
px, py = self.find(x), self.find(y)
|
|
82
|
+
if px != py:
|
|
83
|
+
self._parent[px] = py
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def find_constant_groups(constants: list[tuple[Path, ConstantInfo]]) -> list[ConstantGroup]:
|
|
87
|
+
"""Find groups of related constants.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
constants: List of (file_path, ConstantInfo) tuples
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
List of ConstantGroup instances representing related constants
|
|
94
|
+
"""
|
|
95
|
+
if not constants:
|
|
96
|
+
return []
|
|
97
|
+
locations = _build_locations(constants)
|
|
98
|
+
exact_groups = _group_by_exact_name(locations)
|
|
99
|
+
return _merge_fuzzy_groups(exact_groups)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _merge_fuzzy_groups(groups: dict[str, ConstantGroup]) -> list[ConstantGroup]:
|
|
103
|
+
"""Merge groups that match via fuzzy matching."""
|
|
104
|
+
names = list(groups.keys())
|
|
105
|
+
uf = UnionFind(names)
|
|
106
|
+
_union_matching_pairs(names, uf, _is_fuzzy_match)
|
|
107
|
+
return _build_merged_groups(names, groups, uf)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _is_fuzzy_match(name1: str, name2: str) -> bool:
|
|
111
|
+
"""Check if two constant names should be considered a match."""
|
|
112
|
+
if name1 == name2:
|
|
113
|
+
return True
|
|
114
|
+
return _is_fuzzy_similar(name1, name2)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _build_locations(constants: list[tuple[Path, ConstantInfo]]) -> list[ConstantLocation]:
|
|
118
|
+
"""Build location list from constants."""
|
|
119
|
+
return [
|
|
120
|
+
ConstantLocation(
|
|
121
|
+
file_path=file_path, line_number=info.line_number, name=info.name, value=info.value
|
|
122
|
+
)
|
|
123
|
+
for file_path, info in constants
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _group_by_exact_name(locations: list[ConstantLocation]) -> dict[str, ConstantGroup]:
|
|
128
|
+
"""Group locations by exact constant name."""
|
|
129
|
+
groups: dict[str, ConstantGroup] = {}
|
|
130
|
+
for loc in locations:
|
|
131
|
+
if loc.name not in groups:
|
|
132
|
+
groups[loc.name] = ConstantGroup(
|
|
133
|
+
canonical_name=loc.name, locations=[], all_names=set(), is_fuzzy_match=False
|
|
134
|
+
)
|
|
135
|
+
groups[loc.name].add_location(loc)
|
|
136
|
+
return groups
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _union_matching_pairs(
|
|
140
|
+
names: list[str], uf: UnionFind, is_match: Callable[[str, str], bool]
|
|
141
|
+
) -> None:
|
|
142
|
+
"""Union all pairs of names that match."""
|
|
143
|
+
for name1, name2 in combinations(names, 2):
|
|
144
|
+
if is_match(name1, name2):
|
|
145
|
+
uf.union(name1, name2)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _build_merged_groups(
|
|
149
|
+
names: list[str], groups: dict[str, ConstantGroup], uf: UnionFind
|
|
150
|
+
) -> list[ConstantGroup]:
|
|
151
|
+
"""Build merged groups from union-find structure."""
|
|
152
|
+
merged: dict[str, ConstantGroup] = {}
|
|
153
|
+
for name in names:
|
|
154
|
+
root = uf.find(name)
|
|
155
|
+
if root not in merged:
|
|
156
|
+
merged[root] = ConstantGroup(
|
|
157
|
+
canonical_name=root, locations=[], all_names=set(), is_fuzzy_match=False
|
|
158
|
+
)
|
|
159
|
+
for loc in groups[name].locations:
|
|
160
|
+
merged[root].add_location(loc)
|
|
161
|
+
if name != root:
|
|
162
|
+
merged[root].is_fuzzy_match = True
|
|
163
|
+
return list(merged.values())
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _get_words(name: str) -> list[str]:
|
|
167
|
+
"""Split constant name into lowercase words."""
|
|
168
|
+
return [w.lower() for w in name.split("_") if w]
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _is_fuzzy_similar(name1: str, name2: str) -> bool:
|
|
172
|
+
"""Check if two names are fuzzy similar (word-set or edit distance)."""
|
|
173
|
+
words1, words2 = _get_words(name1), _get_words(name2)
|
|
174
|
+
if not _has_enough_words(words1, words2):
|
|
175
|
+
return False
|
|
176
|
+
if _has_antonym_conflict(set(words1), set(words2)):
|
|
177
|
+
return False
|
|
178
|
+
return _word_set_match(words1, words2) or _edit_distance_match(name1, name2)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _has_enough_words(words1: list[str], words2: list[str]) -> bool:
|
|
182
|
+
"""Check if both word lists have at least 2 words for fuzzy matching."""
|
|
183
|
+
return len(words1) >= 2 and len(words2) >= 2
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _word_set_match(words1: list[str], words2: list[str]) -> bool:
|
|
187
|
+
"""Check if two word lists contain the same words."""
|
|
188
|
+
return set(words1) == set(words2)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _has_antonym_conflict(set1: set[str], set2: set[str]) -> bool:
|
|
192
|
+
"""Check if word sets contain conflicting antonyms (e.g., MAX vs MIN)."""
|
|
193
|
+
return any(_is_antonym_split(pair, set1, set2) for pair in ANTONYM_PAIRS)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _is_antonym_split(pair: frozenset[str], set1: set[str], set2: set[str]) -> bool:
|
|
197
|
+
"""Check if one set has one word of the pair and the other has the opposite."""
|
|
198
|
+
pair_list = tuple(pair)
|
|
199
|
+
word_a, word_b = pair_list[0], pair_list[1]
|
|
200
|
+
return (word_a in set1 and word_b in set2) or (word_b in set1 and word_a in set2)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _edit_distance_match(name1: str, name2: str) -> bool:
|
|
204
|
+
"""Check if names match within edit distance threshold."""
|
|
205
|
+
return _levenshtein_distance(name1.lower(), name2.lower()) <= MAX_EDIT_DISTANCE
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _levenshtein_distance(s1: str, s2: str) -> int:
|
|
209
|
+
"""Calculate Levenshtein distance between two strings."""
|
|
210
|
+
if len(s1) < len(s2):
|
|
211
|
+
return _levenshtein_distance(s2, s1) # pylint: disable=arguments-out-of-order
|
|
212
|
+
if len(s2) == 0:
|
|
213
|
+
return len(s1)
|
|
214
|
+
previous_row = list(range(len(s2) + 1))
|
|
215
|
+
for i, c1 in enumerate(s1):
|
|
216
|
+
current_row = [i + 1]
|
|
217
|
+
for j, c2 in enumerate(s2):
|
|
218
|
+
insertions = previous_row[j + 1] + 1
|
|
219
|
+
deletions = current_row[j] + 1
|
|
220
|
+
substitutions = previous_row[j] + (c1 != c2)
|
|
221
|
+
current_row.append(min(insertions, deletions, substitutions))
|
|
222
|
+
previous_row = current_row
|
|
223
|
+
return previous_row[-1]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Build violation messages for duplicate constants
|
|
3
|
+
|
|
4
|
+
Scope: Violation message formatting for constant duplication detection
|
|
5
|
+
|
|
6
|
+
Overview: Formats detailed violation messages for duplicate constant detection. Creates messages
|
|
7
|
+
that include the constant name(s), all file locations with line numbers, and the values
|
|
8
|
+
assigned at each location. Distinguishes between exact matches (same constant name) and
|
|
9
|
+
fuzzy matches (similar names like API_TIMEOUT and TIMEOUT_API). Provides actionable guidance
|
|
10
|
+
to consolidate constants into a shared module.
|
|
11
|
+
|
|
12
|
+
Dependencies: ConstantGroup from constant module, Violation from core.types
|
|
13
|
+
|
|
14
|
+
Exports: ConstantViolationBuilder class
|
|
15
|
+
|
|
16
|
+
Interfaces: ConstantViolationBuilder.build_violations(groups, rule_id) -> list[Violation]
|
|
17
|
+
|
|
18
|
+
Implementation: Message template formatting with location enumeration and fuzzy match indication
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from src.core.types import Severity, Violation
|
|
22
|
+
|
|
23
|
+
from .constant import ConstantGroup, ConstantLocation
|
|
24
|
+
|
|
25
|
+
# Maximum other locations to show in violation message
|
|
26
|
+
MAX_DISPLAYED_LOCATIONS = 3
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ConstantViolationBuilder:
|
|
30
|
+
"""Builds violation messages for duplicate constants."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, min_occurrences: int = 2) -> None:
|
|
33
|
+
"""Initialize with minimum occurrence threshold."""
|
|
34
|
+
self.min_occurrences = min_occurrences
|
|
35
|
+
|
|
36
|
+
def build_violations(self, groups: list[ConstantGroup], rule_id: str) -> list[Violation]:
|
|
37
|
+
"""Build violations from constant groups."""
|
|
38
|
+
violations = []
|
|
39
|
+
for group in groups:
|
|
40
|
+
if group.file_count >= self.min_occurrences:
|
|
41
|
+
violations.extend(self._violations_for_group(group, rule_id))
|
|
42
|
+
return violations
|
|
43
|
+
|
|
44
|
+
def _violations_for_group(self, group: ConstantGroup, rule_id: str) -> list[Violation]:
|
|
45
|
+
"""Create violations for all locations in a group."""
|
|
46
|
+
return [
|
|
47
|
+
Violation(
|
|
48
|
+
rule_id=rule_id,
|
|
49
|
+
file_path=str(loc.file_path),
|
|
50
|
+
line=loc.line_number,
|
|
51
|
+
column=1,
|
|
52
|
+
message=self._format_message(group, loc),
|
|
53
|
+
severity=Severity.ERROR,
|
|
54
|
+
)
|
|
55
|
+
for loc in group.locations
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
def _format_message(self, group: ConstantGroup, current: ConstantLocation) -> str:
|
|
59
|
+
"""Format the violation message based on match type."""
|
|
60
|
+
others = _get_other_locations(group, current)
|
|
61
|
+
locations_text = _format_locations_text(others)
|
|
62
|
+
if group.is_fuzzy_match:
|
|
63
|
+
names_str = " ≈ ".join(f"'{n}'" for n in sorted(group.all_names))
|
|
64
|
+
return (
|
|
65
|
+
f"Similar constants found: {names_str} in {group.file_count} files. "
|
|
66
|
+
f"{locations_text} "
|
|
67
|
+
f"These appear to represent the same concept - consider standardizing the name."
|
|
68
|
+
)
|
|
69
|
+
return (
|
|
70
|
+
f"Duplicate constant '{group.canonical_name}' defined in {group.file_count} files. "
|
|
71
|
+
f"{locations_text} "
|
|
72
|
+
f"Consider consolidating to a shared constants module."
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _get_other_locations(group: ConstantGroup, current: ConstantLocation) -> list[ConstantLocation]:
|
|
77
|
+
"""Get locations excluding current (module-level helper)."""
|
|
78
|
+
return [
|
|
79
|
+
loc
|
|
80
|
+
for loc in group.locations
|
|
81
|
+
if loc.file_path != current.file_path or loc.line_number != current.line_number
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _format_locations_text(others: list[ConstantLocation]) -> str:
|
|
86
|
+
"""Format other locations as text (module-level helper)."""
|
|
87
|
+
if not others:
|
|
88
|
+
return ""
|
|
89
|
+
parts = [_format_single_location(loc) for loc in others[:MAX_DISPLAYED_LOCATIONS]]
|
|
90
|
+
result = "Also found in: " + ", ".join(parts)
|
|
91
|
+
extra = len(others) - MAX_DISPLAYED_LOCATIONS
|
|
92
|
+
return result + (f" and {extra} more." if extra > 0 else ".")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _format_single_location(loc: ConstantLocation) -> str:
|
|
96
|
+
"""Format a single location for display (module-level helper)."""
|
|
97
|
+
value_str = f" = {loc.value}" if loc.value else ""
|
|
98
|
+
return f"{loc.file_path.name}:{loc.line_number} ({loc.name}{value_str})"
|
|
@@ -11,7 +11,7 @@ Dependencies: DRYCache, CodeBlock, Path
|
|
|
11
11
|
|
|
12
12
|
Exports: DuplicateStorage class
|
|
13
13
|
|
|
14
|
-
Interfaces: DuplicateStorage.add_blocks(file_path, blocks),
|
|
14
|
+
Interfaces: DuplicateStorage.add_blocks(file_path, blocks), duplicate_hashes property,
|
|
15
15
|
get_blocks_for_hash(hash_value)
|
|
16
16
|
|
|
17
17
|
Implementation: Delegates to SQLite cache for all storage operations
|
|
@@ -43,13 +43,14 @@ class DuplicateStorage:
|
|
|
43
43
|
if blocks:
|
|
44
44
|
self._cache.add_blocks(file_path, blocks)
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
@property
|
|
47
|
+
def duplicate_hashes(self) -> list[int]:
|
|
48
|
+
"""Hash values with 2+ occurrences from SQLite.
|
|
48
49
|
|
|
49
50
|
Returns:
|
|
50
51
|
List of hash values that appear in multiple blocks
|
|
51
52
|
"""
|
|
52
|
-
return self._cache.
|
|
53
|
+
return self._cache.duplicate_hashes
|
|
53
54
|
|
|
54
55
|
def get_blocks_for_hash(self, hash_value: int) -> list[CodeBlock]:
|
|
55
56
|
"""Get all blocks with given hash value from SQLite.
|
src/linters/dry/file_analyzer.py
CHANGED
|
@@ -18,6 +18,8 @@ Implementation: Delegates to language-specific analyzers, always performs fresh
|
|
|
18
18
|
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
|
|
21
|
+
from src.core.constants import Language
|
|
22
|
+
|
|
21
23
|
from .block_filter import BlockFilterRegistry, create_default_registry
|
|
22
24
|
from .cache import CodeBlock
|
|
23
25
|
from .config import DRYConfig
|
|
@@ -83,8 +85,8 @@ class FileAnalyzer:
|
|
|
83
85
|
List of CodeBlock instances
|
|
84
86
|
"""
|
|
85
87
|
# Analyze file based on language
|
|
86
|
-
if language ==
|
|
88
|
+
if language == Language.PYTHON:
|
|
87
89
|
return self._python_analyzer.analyze(file_path, content, config)
|
|
88
|
-
if language in (
|
|
90
|
+
if language in (Language.TYPESCRIPT, Language.JAVASCRIPT):
|
|
89
91
|
return self._typescript_analyzer.analyze(file_path, content, config)
|
|
90
92
|
return []
|
src/linters/dry/inline_ignore.py
CHANGED
|
@@ -50,14 +50,11 @@ class InlineIgnoreParser:
|
|
|
50
50
|
Returns:
|
|
51
51
|
List of (start, end) tuples for ignore ranges
|
|
52
52
|
"""
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
ignore_range
|
|
57
|
-
|
|
58
|
-
ranges.append(ignore_range)
|
|
59
|
-
|
|
60
|
-
return ranges
|
|
53
|
+
return [
|
|
54
|
+
ignore_range
|
|
55
|
+
for i, line in enumerate(lines, start=1)
|
|
56
|
+
if (ignore_range := self._parse_ignore_directive(line, i, len(lines)))
|
|
57
|
+
]
|
|
61
58
|
|
|
62
59
|
def _parse_ignore_directive(
|
|
63
60
|
self, line: str, line_num: int, total_lines: int
|
|
@@ -115,10 +112,7 @@ class InlineIgnoreParser:
|
|
|
115
112
|
Returns:
|
|
116
113
|
True if ranges overlap
|
|
117
114
|
"""
|
|
118
|
-
for ign_start, ign_end in ranges
|
|
119
|
-
if line <= ign_end and end_line >= ign_start:
|
|
120
|
-
return True
|
|
121
|
-
return False
|
|
115
|
+
return any(line <= ign_end and end_line >= ign_start for ign_start, ign_end in ranges)
|
|
122
116
|
|
|
123
117
|
def _check_single_line(self, line: int, ranges: list[tuple[int, int]]) -> bool:
|
|
124
118
|
"""Check if single line is in any ignore range.
|
|
@@ -130,10 +124,7 @@ class InlineIgnoreParser:
|
|
|
130
124
|
Returns:
|
|
131
125
|
True if line is in any range
|
|
132
126
|
"""
|
|
133
|
-
for start, end in ranges
|
|
134
|
-
if start <= line <= end:
|
|
135
|
-
return True
|
|
136
|
-
return False
|
|
127
|
+
return any(start <= line <= end for start, end in ranges)
|
|
137
128
|
|
|
138
129
|
def clear(self) -> None:
|
|
139
130
|
"""Clear all stored ignore ranges."""
|