thailint 0.2.0__py3-none-any.whl → 0.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +1 -0
- src/analyzers/__init__.py +4 -3
- src/analyzers/ast_utils.py +54 -0
- src/analyzers/rust_base.py +155 -0
- src/analyzers/rust_context.py +141 -0
- src/analyzers/typescript_base.py +4 -0
- src/cli/__init__.py +30 -0
- src/cli/__main__.py +22 -0
- src/cli/config.py +480 -0
- src/cli/config_merge.py +241 -0
- src/cli/linters/__init__.py +67 -0
- src/cli/linters/code_patterns.py +270 -0
- src/cli/linters/code_smells.py +342 -0
- src/cli/linters/documentation.py +83 -0
- src/cli/linters/performance.py +287 -0
- src/cli/linters/shared.py +331 -0
- src/cli/linters/structure.py +327 -0
- src/cli/linters/structure_quality.py +328 -0
- src/cli/main.py +120 -0
- src/cli/utils.py +395 -0
- src/cli_main.py +37 -0
- src/config.py +44 -27
- src/core/base.py +95 -5
- src/core/cli_utils.py +19 -2
- src/core/config_parser.py +36 -6
- src/core/constants.py +54 -0
- src/core/linter_utils.py +95 -6
- src/core/python_lint_rule.py +101 -0
- src/core/registry.py +1 -1
- src/core/rule_discovery.py +147 -84
- src/core/types.py +13 -0
- src/core/violation_builder.py +78 -15
- src/core/violation_utils.py +69 -0
- src/formatters/__init__.py +22 -0
- src/formatters/sarif.py +202 -0
- src/linter_config/directive_markers.py +109 -0
- src/linter_config/ignore.py +254 -395
- src/linter_config/loader.py +45 -12
- src/linter_config/pattern_utils.py +65 -0
- src/linter_config/rule_matcher.py +89 -0
- src/linters/collection_pipeline/__init__.py +90 -0
- src/linters/collection_pipeline/any_all_analyzer.py +281 -0
- src/linters/collection_pipeline/ast_utils.py +40 -0
- src/linters/collection_pipeline/config.py +75 -0
- src/linters/collection_pipeline/continue_analyzer.py +94 -0
- src/linters/collection_pipeline/detector.py +360 -0
- src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
- src/linters/collection_pipeline/linter.py +420 -0
- src/linters/collection_pipeline/suggestion_builder.py +130 -0
- src/linters/cqs/__init__.py +54 -0
- src/linters/cqs/config.py +55 -0
- src/linters/cqs/function_analyzer.py +201 -0
- src/linters/cqs/input_detector.py +139 -0
- src/linters/cqs/linter.py +159 -0
- src/linters/cqs/output_detector.py +84 -0
- src/linters/cqs/python_analyzer.py +54 -0
- src/linters/cqs/types.py +82 -0
- src/linters/cqs/typescript_cqs_analyzer.py +61 -0
- src/linters/cqs/typescript_function_analyzer.py +192 -0
- src/linters/cqs/typescript_input_detector.py +203 -0
- src/linters/cqs/typescript_output_detector.py +117 -0
- src/linters/cqs/violation_builder.py +94 -0
- src/linters/dry/base_token_analyzer.py +16 -9
- src/linters/dry/block_filter.py +125 -22
- src/linters/dry/block_grouper.py +4 -0
- src/linters/dry/cache.py +142 -94
- src/linters/dry/cache_query.py +4 -0
- src/linters/dry/config.py +68 -21
- src/linters/dry/constant.py +92 -0
- src/linters/dry/constant_matcher.py +223 -0
- src/linters/dry/constant_violation_builder.py +98 -0
- src/linters/dry/duplicate_storage.py +20 -82
- src/linters/dry/file_analyzer.py +15 -50
- src/linters/dry/inline_ignore.py +7 -16
- src/linters/dry/linter.py +182 -54
- src/linters/dry/python_analyzer.py +108 -336
- src/linters/dry/python_constant_extractor.py +100 -0
- src/linters/dry/single_statement_detector.py +417 -0
- src/linters/dry/storage_initializer.py +9 -18
- src/linters/dry/token_hasher.py +129 -71
- src/linters/dry/typescript_analyzer.py +68 -380
- src/linters/dry/typescript_constant_extractor.py +138 -0
- src/linters/dry/typescript_statement_detector.py +255 -0
- src/linters/dry/typescript_value_extractor.py +70 -0
- src/linters/dry/violation_builder.py +4 -0
- src/linters/dry/violation_filter.py +9 -5
- src/linters/dry/violation_generator.py +71 -14
- src/linters/file_header/__init__.py +24 -0
- src/linters/file_header/atemporal_detector.py +105 -0
- src/linters/file_header/base_parser.py +93 -0
- src/linters/file_header/bash_parser.py +66 -0
- src/linters/file_header/config.py +140 -0
- src/linters/file_header/css_parser.py +70 -0
- src/linters/file_header/field_validator.py +72 -0
- src/linters/file_header/linter.py +309 -0
- src/linters/file_header/markdown_parser.py +130 -0
- src/linters/file_header/python_parser.py +42 -0
- src/linters/file_header/typescript_parser.py +73 -0
- src/linters/file_header/violation_builder.py +79 -0
- src/linters/file_placement/config_loader.py +3 -1
- src/linters/file_placement/directory_matcher.py +4 -0
- src/linters/file_placement/linter.py +74 -31
- src/linters/file_placement/pattern_matcher.py +41 -6
- src/linters/file_placement/pattern_validator.py +31 -12
- src/linters/file_placement/rule_checker.py +12 -7
- src/linters/lazy_ignores/__init__.py +43 -0
- src/linters/lazy_ignores/config.py +74 -0
- src/linters/lazy_ignores/directive_utils.py +164 -0
- src/linters/lazy_ignores/header_parser.py +177 -0
- src/linters/lazy_ignores/linter.py +158 -0
- src/linters/lazy_ignores/matcher.py +168 -0
- src/linters/lazy_ignores/python_analyzer.py +209 -0
- src/linters/lazy_ignores/rule_id_utils.py +180 -0
- src/linters/lazy_ignores/skip_detector.py +298 -0
- src/linters/lazy_ignores/types.py +71 -0
- src/linters/lazy_ignores/typescript_analyzer.py +146 -0
- src/linters/lazy_ignores/violation_builder.py +135 -0
- src/linters/lbyl/__init__.py +31 -0
- src/linters/lbyl/config.py +63 -0
- src/linters/lbyl/linter.py +67 -0
- src/linters/lbyl/pattern_detectors/__init__.py +53 -0
- src/linters/lbyl/pattern_detectors/base.py +63 -0
- src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
- src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
- src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
- src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
- src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
- src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
- src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
- src/linters/lbyl/python_analyzer.py +215 -0
- src/linters/lbyl/violation_builder.py +354 -0
- src/linters/magic_numbers/__init__.py +48 -0
- src/linters/magic_numbers/config.py +82 -0
- src/linters/magic_numbers/context_analyzer.py +249 -0
- src/linters/magic_numbers/linter.py +462 -0
- src/linters/magic_numbers/python_analyzer.py +64 -0
- src/linters/magic_numbers/typescript_analyzer.py +215 -0
- src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
- src/linters/magic_numbers/violation_builder.py +98 -0
- src/linters/method_property/__init__.py +49 -0
- src/linters/method_property/config.py +138 -0
- src/linters/method_property/linter.py +414 -0
- src/linters/method_property/python_analyzer.py +473 -0
- src/linters/method_property/violation_builder.py +119 -0
- src/linters/nesting/__init__.py +6 -2
- src/linters/nesting/config.py +6 -3
- src/linters/nesting/linter.py +31 -34
- src/linters/nesting/python_analyzer.py +4 -0
- src/linters/nesting/typescript_analyzer.py +6 -11
- src/linters/nesting/violation_builder.py +1 -0
- src/linters/performance/__init__.py +91 -0
- src/linters/performance/config.py +43 -0
- src/linters/performance/constants.py +49 -0
- src/linters/performance/linter.py +149 -0
- src/linters/performance/python_analyzer.py +365 -0
- src/linters/performance/regex_analyzer.py +312 -0
- src/linters/performance/regex_linter.py +139 -0
- src/linters/performance/typescript_analyzer.py +236 -0
- src/linters/performance/violation_builder.py +160 -0
- src/linters/print_statements/__init__.py +53 -0
- src/linters/print_statements/config.py +78 -0
- src/linters/print_statements/linter.py +413 -0
- src/linters/print_statements/python_analyzer.py +153 -0
- src/linters/print_statements/typescript_analyzer.py +125 -0
- src/linters/print_statements/violation_builder.py +96 -0
- src/linters/srp/__init__.py +3 -3
- src/linters/srp/class_analyzer.py +11 -7
- src/linters/srp/config.py +12 -6
- src/linters/srp/heuristics.py +56 -22
- src/linters/srp/linter.py +47 -39
- src/linters/srp/python_analyzer.py +55 -20
- src/linters/srp/typescript_metrics_calculator.py +110 -50
- src/linters/stateless_class/__init__.py +25 -0
- src/linters/stateless_class/config.py +58 -0
- src/linters/stateless_class/linter.py +349 -0
- src/linters/stateless_class/python_analyzer.py +290 -0
- src/linters/stringly_typed/__init__.py +36 -0
- src/linters/stringly_typed/config.py +189 -0
- src/linters/stringly_typed/context_filter.py +451 -0
- src/linters/stringly_typed/function_call_violation_builder.py +135 -0
- src/linters/stringly_typed/ignore_checker.py +100 -0
- src/linters/stringly_typed/ignore_utils.py +51 -0
- src/linters/stringly_typed/linter.py +376 -0
- src/linters/stringly_typed/python/__init__.py +33 -0
- src/linters/stringly_typed/python/analyzer.py +348 -0
- src/linters/stringly_typed/python/call_tracker.py +175 -0
- src/linters/stringly_typed/python/comparison_tracker.py +257 -0
- src/linters/stringly_typed/python/condition_extractor.py +134 -0
- src/linters/stringly_typed/python/conditional_detector.py +179 -0
- src/linters/stringly_typed/python/constants.py +21 -0
- src/linters/stringly_typed/python/match_analyzer.py +94 -0
- src/linters/stringly_typed/python/validation_detector.py +189 -0
- src/linters/stringly_typed/python/variable_extractor.py +96 -0
- src/linters/stringly_typed/storage.py +620 -0
- src/linters/stringly_typed/storage_initializer.py +45 -0
- src/linters/stringly_typed/typescript/__init__.py +28 -0
- src/linters/stringly_typed/typescript/analyzer.py +157 -0
- src/linters/stringly_typed/typescript/call_tracker.py +335 -0
- src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
- src/linters/stringly_typed/violation_generator.py +419 -0
- src/orchestrator/core.py +264 -16
- src/orchestrator/language_detector.py +5 -3
- src/templates/thailint_config_template.yaml +354 -0
- src/utils/project_root.py +138 -16
- thailint-0.15.3.dist-info/METADATA +187 -0
- thailint-0.15.3.dist-info/RECORD +226 -0
- {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +1 -1
- thailint-0.15.3.dist-info/entry_points.txt +4 -0
- src/cli.py +0 -1055
- thailint-0.2.0.dist-info/METADATA +0 -980
- thailint-0.2.0.dist-info/RECORD +0 -75
- thailint-0.2.0.dist-info/entry_points.txt +0 -4
- {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Fuzzy matching for constant names across files
|
|
3
|
+
|
|
4
|
+
Scope: Constant name matching with word-set and edit distance algorithms
|
|
5
|
+
|
|
6
|
+
Overview: Implements fuzzy matching strategies to identify related constants across files. Uses
|
|
7
|
+
two matching strategies: word-set matching (same words in different order, e.g., API_TIMEOUT
|
|
8
|
+
and TIMEOUT_API) and edit distance matching (typos within Levenshtein distance <= 2, e.g.,
|
|
9
|
+
MAX_RETRYS and MAX_RETRIES). Single-word constants (e.g., MAX, TIMEOUT) only use exact
|
|
10
|
+
matching to avoid false positives. Groups related constants into ConstantGroup instances
|
|
11
|
+
for violation reporting.
|
|
12
|
+
|
|
13
|
+
Dependencies: ConstantInfo, ConstantLocation, ConstantGroup from constant module
|
|
14
|
+
|
|
15
|
+
Exports: find_constant_groups function
|
|
16
|
+
|
|
17
|
+
Interfaces: find_constant_groups(constants) -> list[ConstantGroup]
|
|
18
|
+
|
|
19
|
+
Implementation: Union-Find algorithm for grouping, word-set hashing, Levenshtein distance calculation
|
|
20
|
+
|
|
21
|
+
Suppressions:
|
|
22
|
+
- arguments-out-of-order: Named arguments used for clarity in ConstantLocation
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from collections.abc import Callable
|
|
26
|
+
from itertools import combinations
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
|
|
29
|
+
from .constant import ConstantGroup, ConstantInfo, ConstantLocation
|
|
30
|
+
|
|
31
|
+
# Maximum edit distance for fuzzy matching
|
|
32
|
+
MAX_EDIT_DISTANCE = 2
|
|
33
|
+
|
|
34
|
+
# Antonym pairs that should not be fuzzy-matched
|
|
35
|
+
# If one name contains a word from the left side and the other contains the right side,
|
|
36
|
+
# they represent different concepts and should not be grouped together
|
|
37
|
+
ANTONYM_PAIRS = frozenset(
|
|
38
|
+
(
|
|
39
|
+
frozenset(("max", "min")),
|
|
40
|
+
frozenset(("start", "end")),
|
|
41
|
+
frozenset(("first", "last")),
|
|
42
|
+
frozenset(("before", "after")),
|
|
43
|
+
frozenset(("open", "close")),
|
|
44
|
+
frozenset(("read", "write")),
|
|
45
|
+
frozenset(("get", "set")),
|
|
46
|
+
frozenset(("push", "pop")),
|
|
47
|
+
frozenset(("add", "remove")),
|
|
48
|
+
frozenset(("create", "delete")),
|
|
49
|
+
frozenset(("enable", "disable")),
|
|
50
|
+
frozenset(("show", "hide")),
|
|
51
|
+
frozenset(("up", "down")),
|
|
52
|
+
frozenset(("left", "right")),
|
|
53
|
+
frozenset(("top", "bottom")),
|
|
54
|
+
frozenset(("prev", "next")),
|
|
55
|
+
frozenset(("success", "failure")),
|
|
56
|
+
frozenset(("true", "false")),
|
|
57
|
+
frozenset(("on", "off")),
|
|
58
|
+
frozenset(("in", "out")),
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Minimum length for constant names (exclude single-letter type params like P, T, K, V)
|
|
63
|
+
MIN_CONSTANT_NAME_LENGTH = 2
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class UnionFind:
|
|
67
|
+
"""Union-Find data structure for grouping."""
|
|
68
|
+
|
|
69
|
+
def __init__(self, items: list[str]) -> None:
|
|
70
|
+
"""Initialize with list of items."""
|
|
71
|
+
self._parent = {item: item for item in items}
|
|
72
|
+
|
|
73
|
+
def find(self, x: str) -> str:
|
|
74
|
+
"""Find root with path compression."""
|
|
75
|
+
if self._parent[x] != x:
|
|
76
|
+
self._parent[x] = self.find(self._parent[x])
|
|
77
|
+
return self._parent[x]
|
|
78
|
+
|
|
79
|
+
def union(self, x: str, y: str) -> None:
|
|
80
|
+
"""Merge two sets."""
|
|
81
|
+
px, py = self.find(x), self.find(y)
|
|
82
|
+
if px != py:
|
|
83
|
+
self._parent[px] = py
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def find_constant_groups(constants: list[tuple[Path, ConstantInfo]]) -> list[ConstantGroup]:
|
|
87
|
+
"""Find groups of related constants.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
constants: List of (file_path, ConstantInfo) tuples
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
List of ConstantGroup instances representing related constants
|
|
94
|
+
"""
|
|
95
|
+
if not constants:
|
|
96
|
+
return []
|
|
97
|
+
locations = _build_locations(constants)
|
|
98
|
+
exact_groups = _group_by_exact_name(locations)
|
|
99
|
+
return _merge_fuzzy_groups(exact_groups)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _merge_fuzzy_groups(groups: dict[str, ConstantGroup]) -> list[ConstantGroup]:
|
|
103
|
+
"""Merge groups that match via fuzzy matching."""
|
|
104
|
+
names = list(groups.keys())
|
|
105
|
+
uf = UnionFind(names)
|
|
106
|
+
_union_matching_pairs(names, uf, _is_fuzzy_match)
|
|
107
|
+
return _build_merged_groups(names, groups, uf)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _is_fuzzy_match(name1: str, name2: str) -> bool:
|
|
111
|
+
"""Check if two constant names should be considered a match."""
|
|
112
|
+
if name1 == name2:
|
|
113
|
+
return True
|
|
114
|
+
return _is_fuzzy_similar(name1, name2)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _build_locations(constants: list[tuple[Path, ConstantInfo]]) -> list[ConstantLocation]:
|
|
118
|
+
"""Build location list from constants."""
|
|
119
|
+
return [
|
|
120
|
+
ConstantLocation(
|
|
121
|
+
file_path=file_path, line_number=info.line_number, name=info.name, value=info.value
|
|
122
|
+
)
|
|
123
|
+
for file_path, info in constants
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _group_by_exact_name(locations: list[ConstantLocation]) -> dict[str, ConstantGroup]:
|
|
128
|
+
"""Group locations by exact constant name."""
|
|
129
|
+
groups: dict[str, ConstantGroup] = {}
|
|
130
|
+
for loc in locations:
|
|
131
|
+
if loc.name not in groups:
|
|
132
|
+
groups[loc.name] = ConstantGroup(
|
|
133
|
+
canonical_name=loc.name, locations=[], all_names=set(), is_fuzzy_match=False
|
|
134
|
+
)
|
|
135
|
+
groups[loc.name].add_location(loc)
|
|
136
|
+
return groups
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _union_matching_pairs(
|
|
140
|
+
names: list[str], uf: UnionFind, is_match: Callable[[str, str], bool]
|
|
141
|
+
) -> None:
|
|
142
|
+
"""Union all pairs of names that match."""
|
|
143
|
+
for name1, name2 in combinations(names, 2):
|
|
144
|
+
if is_match(name1, name2):
|
|
145
|
+
uf.union(name1, name2)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _build_merged_groups(
|
|
149
|
+
names: list[str], groups: dict[str, ConstantGroup], uf: UnionFind
|
|
150
|
+
) -> list[ConstantGroup]:
|
|
151
|
+
"""Build merged groups from union-find structure."""
|
|
152
|
+
merged: dict[str, ConstantGroup] = {}
|
|
153
|
+
for name in names:
|
|
154
|
+
root = uf.find(name)
|
|
155
|
+
if root not in merged:
|
|
156
|
+
merged[root] = ConstantGroup(
|
|
157
|
+
canonical_name=root, locations=[], all_names=set(), is_fuzzy_match=False
|
|
158
|
+
)
|
|
159
|
+
for loc in groups[name].locations:
|
|
160
|
+
merged[root].add_location(loc)
|
|
161
|
+
if name != root:
|
|
162
|
+
merged[root].is_fuzzy_match = True
|
|
163
|
+
return list(merged.values())
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _get_words(name: str) -> list[str]:
|
|
167
|
+
"""Split constant name into lowercase words."""
|
|
168
|
+
return [w.lower() for w in name.split("_") if w]
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _is_fuzzy_similar(name1: str, name2: str) -> bool:
|
|
172
|
+
"""Check if two names are fuzzy similar (word-set or edit distance)."""
|
|
173
|
+
words1, words2 = _get_words(name1), _get_words(name2)
|
|
174
|
+
if not _has_enough_words(words1, words2):
|
|
175
|
+
return False
|
|
176
|
+
if _has_antonym_conflict(set(words1), set(words2)):
|
|
177
|
+
return False
|
|
178
|
+
return _word_set_match(words1, words2) or _edit_distance_match(name1, name2)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _has_enough_words(words1: list[str], words2: list[str]) -> bool:
|
|
182
|
+
"""Check if both word lists have at least 2 words for fuzzy matching."""
|
|
183
|
+
return len(words1) >= 2 and len(words2) >= 2
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _word_set_match(words1: list[str], words2: list[str]) -> bool:
|
|
187
|
+
"""Check if two word lists contain the same words."""
|
|
188
|
+
return set(words1) == set(words2)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _has_antonym_conflict(set1: set[str], set2: set[str]) -> bool:
|
|
192
|
+
"""Check if word sets contain conflicting antonyms (e.g., MAX vs MIN)."""
|
|
193
|
+
return any(_is_antonym_split(pair, set1, set2) for pair in ANTONYM_PAIRS)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _is_antonym_split(pair: frozenset[str], set1: set[str], set2: set[str]) -> bool:
|
|
197
|
+
"""Check if one set has one word of the pair and the other has the opposite."""
|
|
198
|
+
pair_list = tuple(pair)
|
|
199
|
+
word_a, word_b = pair_list[0], pair_list[1]
|
|
200
|
+
return (word_a in set1 and word_b in set2) or (word_b in set1 and word_a in set2)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _edit_distance_match(name1: str, name2: str) -> bool:
|
|
204
|
+
"""Check if names match within edit distance threshold."""
|
|
205
|
+
return _levenshtein_distance(name1.lower(), name2.lower()) <= MAX_EDIT_DISTANCE
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _levenshtein_distance(s1: str, s2: str) -> int:
|
|
209
|
+
"""Calculate Levenshtein distance between two strings."""
|
|
210
|
+
if len(s1) < len(s2):
|
|
211
|
+
return _levenshtein_distance(s2, s1) # pylint: disable=arguments-out-of-order
|
|
212
|
+
if len(s2) == 0:
|
|
213
|
+
return len(s1)
|
|
214
|
+
previous_row = list(range(len(s2) + 1))
|
|
215
|
+
for i, c1 in enumerate(s1):
|
|
216
|
+
current_row = [i + 1]
|
|
217
|
+
for j, c2 in enumerate(s2):
|
|
218
|
+
insertions = previous_row[j + 1] + 1
|
|
219
|
+
deletions = current_row[j] + 1
|
|
220
|
+
substitutions = previous_row[j] + (c1 != c2)
|
|
221
|
+
current_row.append(min(insertions, deletions, substitutions))
|
|
222
|
+
previous_row = current_row
|
|
223
|
+
return previous_row[-1]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Build violation messages for duplicate constants
|
|
3
|
+
|
|
4
|
+
Scope: Violation message formatting for constant duplication detection
|
|
5
|
+
|
|
6
|
+
Overview: Formats detailed violation messages for duplicate constant detection. Creates messages
|
|
7
|
+
that include the constant name(s), all file locations with line numbers, and the values
|
|
8
|
+
assigned at each location. Distinguishes between exact matches (same constant name) and
|
|
9
|
+
fuzzy matches (similar names like API_TIMEOUT and TIMEOUT_API). Provides actionable guidance
|
|
10
|
+
to consolidate constants into a shared module.
|
|
11
|
+
|
|
12
|
+
Dependencies: ConstantGroup from constant module, Violation from core.types
|
|
13
|
+
|
|
14
|
+
Exports: ConstantViolationBuilder class
|
|
15
|
+
|
|
16
|
+
Interfaces: ConstantViolationBuilder.build_violations(groups, rule_id) -> list[Violation]
|
|
17
|
+
|
|
18
|
+
Implementation: Message template formatting with location enumeration and fuzzy match indication
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from src.core.types import Severity, Violation
|
|
22
|
+
|
|
23
|
+
from .constant import ConstantGroup, ConstantLocation
|
|
24
|
+
|
|
25
|
+
# Maximum other locations to show in violation message
|
|
26
|
+
MAX_DISPLAYED_LOCATIONS = 3
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ConstantViolationBuilder:
|
|
30
|
+
"""Builds violation messages for duplicate constants."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, min_occurrences: int = 2) -> None:
|
|
33
|
+
"""Initialize with minimum occurrence threshold."""
|
|
34
|
+
self.min_occurrences = min_occurrences
|
|
35
|
+
|
|
36
|
+
def build_violations(self, groups: list[ConstantGroup], rule_id: str) -> list[Violation]:
|
|
37
|
+
"""Build violations from constant groups."""
|
|
38
|
+
violations = []
|
|
39
|
+
for group in groups:
|
|
40
|
+
if group.file_count >= self.min_occurrences:
|
|
41
|
+
violations.extend(self._violations_for_group(group, rule_id))
|
|
42
|
+
return violations
|
|
43
|
+
|
|
44
|
+
def _violations_for_group(self, group: ConstantGroup, rule_id: str) -> list[Violation]:
|
|
45
|
+
"""Create violations for all locations in a group."""
|
|
46
|
+
return [
|
|
47
|
+
Violation(
|
|
48
|
+
rule_id=rule_id,
|
|
49
|
+
file_path=str(loc.file_path),
|
|
50
|
+
line=loc.line_number,
|
|
51
|
+
column=1,
|
|
52
|
+
message=self._format_message(group, loc),
|
|
53
|
+
severity=Severity.ERROR,
|
|
54
|
+
)
|
|
55
|
+
for loc in group.locations
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
def _format_message(self, group: ConstantGroup, current: ConstantLocation) -> str:
|
|
59
|
+
"""Format the violation message based on match type."""
|
|
60
|
+
others = _get_other_locations(group, current)
|
|
61
|
+
locations_text = _format_locations_text(others)
|
|
62
|
+
if group.is_fuzzy_match:
|
|
63
|
+
names_str = " ≈ ".join(f"'{n}'" for n in sorted(group.all_names))
|
|
64
|
+
return (
|
|
65
|
+
f"Similar constants found: {names_str} in {group.file_count} files. "
|
|
66
|
+
f"{locations_text} "
|
|
67
|
+
f"These appear to represent the same concept - consider standardizing the name."
|
|
68
|
+
)
|
|
69
|
+
return (
|
|
70
|
+
f"Duplicate constant '{group.canonical_name}' defined in {group.file_count} files. "
|
|
71
|
+
f"{locations_text} "
|
|
72
|
+
f"Consider consolidating to a shared constants module."
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _get_other_locations(group: ConstantGroup, current: ConstantLocation) -> list[ConstantLocation]:
|
|
77
|
+
"""Get locations excluding current (module-level helper)."""
|
|
78
|
+
return [
|
|
79
|
+
loc
|
|
80
|
+
for loc in group.locations
|
|
81
|
+
if loc.file_path != current.file_path or loc.line_number != current.line_number
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _format_locations_text(others: list[ConstantLocation]) -> str:
|
|
86
|
+
"""Format other locations as text (module-level helper)."""
|
|
87
|
+
if not others:
|
|
88
|
+
return ""
|
|
89
|
+
parts = [_format_single_location(loc) for loc in others[:MAX_DISPLAYED_LOCATIONS]]
|
|
90
|
+
result = "Also found in: " + ", ".join(parts)
|
|
91
|
+
extra = len(others) - MAX_DISPLAYED_LOCATIONS
|
|
92
|
+
return result + (f" and {extra} more." if extra > 0 else ".")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _format_single_location(loc: ConstantLocation) -> str:
|
|
96
|
+
"""Format a single location for display (module-level helper)."""
|
|
97
|
+
value_str = f" = {loc.value}" if loc.value else ""
|
|
98
|
+
return f"{loc.file_path.name}:{loc.line_number} ({loc.name}{value_str})"
|
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Purpose: Storage management for duplicate code blocks
|
|
2
|
+
Purpose: Storage management for duplicate code blocks in SQLite
|
|
3
3
|
|
|
4
|
-
Scope: Manages storage of code blocks in SQLite
|
|
4
|
+
Scope: Manages storage of code blocks in SQLite for duplicate detection
|
|
5
5
|
|
|
6
|
-
Overview: Provides
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
concerns from linting logic to maintain SRP compliance.
|
|
6
|
+
Overview: Provides storage interface for code blocks using SQLite (in-memory or tempfile mode).
|
|
7
|
+
Handles block insertion and duplicate hash queries. Delegates all storage operations to
|
|
8
|
+
DRYCache SQLite layer. Separates storage concerns from linting logic to maintain SRP compliance.
|
|
10
9
|
|
|
11
10
|
Dependencies: DRYCache, CodeBlock, Path
|
|
12
11
|
|
|
13
12
|
Exports: DuplicateStorage class
|
|
14
13
|
|
|
15
|
-
Interfaces: DuplicateStorage.add_blocks(file_path, blocks),
|
|
14
|
+
Interfaces: DuplicateStorage.add_blocks(file_path, blocks), duplicate_hashes property,
|
|
16
15
|
get_blocks_for_hash(hash_value)
|
|
17
16
|
|
|
18
|
-
Implementation: Delegates to
|
|
17
|
+
Implementation: Delegates to SQLite cache for all storage operations
|
|
19
18
|
"""
|
|
20
19
|
|
|
21
20
|
from pathlib import Path
|
|
@@ -24,82 +23,37 @@ from .cache import CodeBlock, DRYCache
|
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
class DuplicateStorage:
|
|
27
|
-
"""Manages storage of code blocks in
|
|
26
|
+
"""Manages storage of code blocks in SQLite."""
|
|
28
27
|
|
|
29
|
-
def __init__(self, cache: DRYCache
|
|
30
|
-
"""Initialize storage with
|
|
28
|
+
def __init__(self, cache: DRYCache) -> None:
|
|
29
|
+
"""Initialize storage with SQLite cache.
|
|
31
30
|
|
|
32
31
|
Args:
|
|
33
|
-
cache: SQLite cache instance (
|
|
32
|
+
cache: SQLite cache instance (in-memory or tempfile mode)
|
|
34
33
|
"""
|
|
35
34
|
self._cache = cache
|
|
36
|
-
self._memory_store: dict[int, list[CodeBlock]] = {}
|
|
37
35
|
|
|
38
36
|
def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
39
|
-
"""Add code blocks to storage
|
|
37
|
+
"""Add code blocks to SQLite storage.
|
|
40
38
|
|
|
41
39
|
Args:
|
|
42
40
|
file_path: Path to source file
|
|
43
41
|
blocks: List of code blocks to store
|
|
44
42
|
"""
|
|
45
|
-
|
|
46
|
-
|
|
43
|
+
if blocks:
|
|
44
|
+
self._cache.add_blocks(file_path, blocks)
|
|
47
45
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def add_blocks_to_memory(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
53
|
-
"""Add code blocks to in-memory storage only (for cache hits).
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
file_path: Path to source file (used for cache persistence check)
|
|
57
|
-
blocks: List of code blocks to store
|
|
58
|
-
"""
|
|
59
|
-
# Add to memory for duplicate detection this run
|
|
60
|
-
self._add_to_memory(blocks)
|
|
61
|
-
|
|
62
|
-
# Guard clauses - early returns for skip conditions
|
|
63
|
-
if not self._cache:
|
|
64
|
-
return
|
|
65
|
-
|
|
66
|
-
if not blocks:
|
|
67
|
-
return
|
|
68
|
-
|
|
69
|
-
# Update cache with new blocks if needed (for fresh analysis)
|
|
70
|
-
self._update_cache_if_fresh(file_path, blocks)
|
|
71
|
-
|
|
72
|
-
def _update_cache_if_fresh(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
73
|
-
"""Update cache if file analysis is fresh (not from cache).
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
file_path: Path to source file
|
|
77
|
-
blocks: List of code blocks to store
|
|
78
|
-
"""
|
|
79
|
-
if not self._cache:
|
|
80
|
-
return
|
|
81
|
-
|
|
82
|
-
try:
|
|
83
|
-
mtime = file_path.stat().st_mtime
|
|
84
|
-
except OSError:
|
|
85
|
-
# File doesn't exist, skip cache
|
|
86
|
-
return
|
|
87
|
-
|
|
88
|
-
# File was analyzed (not cached), so persist if not fresh
|
|
89
|
-
if not self._cache.is_fresh(file_path, mtime):
|
|
90
|
-
self._add_to_cache(file_path, blocks)
|
|
91
|
-
|
|
92
|
-
def get_duplicate_hashes(self) -> list[int]:
|
|
93
|
-
"""Get all hash values with 2+ occurrences from memory.
|
|
46
|
+
@property
|
|
47
|
+
def duplicate_hashes(self) -> list[int]:
|
|
48
|
+
"""Hash values with 2+ occurrences from SQLite.
|
|
94
49
|
|
|
95
50
|
Returns:
|
|
96
51
|
List of hash values that appear in multiple blocks
|
|
97
52
|
"""
|
|
98
|
-
|
|
99
|
-
return [h for h, blocks in self._memory_store.items() if len(blocks) >= 2]
|
|
53
|
+
return self._cache.duplicate_hashes
|
|
100
54
|
|
|
101
55
|
def get_blocks_for_hash(self, hash_value: int) -> list[CodeBlock]:
|
|
102
|
-
"""Get all blocks with given hash value from
|
|
56
|
+
"""Get all blocks with given hash value from SQLite.
|
|
103
57
|
|
|
104
58
|
Args:
|
|
105
59
|
hash_value: Hash to search for
|
|
@@ -107,20 +61,4 @@ class DuplicateStorage:
|
|
|
107
61
|
Returns:
|
|
108
62
|
List of code blocks with this hash
|
|
109
63
|
"""
|
|
110
|
-
|
|
111
|
-
return self._memory_store.get(hash_value, [])
|
|
112
|
-
|
|
113
|
-
def _add_to_cache(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
114
|
-
"""Add blocks to SQLite cache."""
|
|
115
|
-
if not self._cache or not blocks:
|
|
116
|
-
return
|
|
117
|
-
|
|
118
|
-
mtime = file_path.stat().st_mtime
|
|
119
|
-
self._cache.save(file_path, mtime, blocks)
|
|
120
|
-
|
|
121
|
-
def _add_to_memory(self, blocks: list[CodeBlock]) -> None:
|
|
122
|
-
"""Add blocks to in-memory store."""
|
|
123
|
-
for block in blocks:
|
|
124
|
-
if block.hash_value not in self._memory_store:
|
|
125
|
-
self._memory_store[block.hash_value] = []
|
|
126
|
-
self._memory_store[block.hash_value].append(block)
|
|
64
|
+
return self._cache.find_duplicates_by_hash(hash_value)
|
src/linters/dry/file_analyzer.py
CHANGED
|
@@ -1,45 +1,34 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Purpose: File analysis orchestration for duplicate detection
|
|
3
3
|
|
|
4
|
-
Scope: Coordinates language-specific analyzers
|
|
4
|
+
Scope: Coordinates language-specific analyzers
|
|
5
5
|
|
|
6
|
-
Overview: Orchestrates file analysis by delegating to language-specific analyzers (Python, TypeScript)
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
SRP compliance.
|
|
6
|
+
Overview: Orchestrates file analysis by delegating to language-specific analyzers (Python, TypeScript).
|
|
7
|
+
Analyzes files fresh every run - no cache loading. Separates file analysis orchestration from
|
|
8
|
+
main linter rule logic to maintain SRP compliance.
|
|
10
9
|
|
|
11
|
-
Dependencies: PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer,
|
|
10
|
+
Dependencies: PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer, DRYConfig, CodeBlock
|
|
12
11
|
|
|
13
12
|
Exports: FileAnalyzer class
|
|
14
13
|
|
|
15
|
-
Interfaces: FileAnalyzer.
|
|
14
|
+
Interfaces: FileAnalyzer.analyze(file_path, content, language, config)
|
|
16
15
|
|
|
17
|
-
Implementation: Delegates to language-specific analyzers,
|
|
16
|
+
Implementation: Delegates to language-specific analyzers, always performs fresh analysis
|
|
18
17
|
"""
|
|
19
18
|
|
|
20
|
-
from dataclasses import dataclass
|
|
21
19
|
from pathlib import Path
|
|
22
20
|
|
|
21
|
+
from src.core.constants import Language
|
|
22
|
+
|
|
23
23
|
from .block_filter import BlockFilterRegistry, create_default_registry
|
|
24
|
-
from .cache import CodeBlock
|
|
24
|
+
from .cache import CodeBlock
|
|
25
25
|
from .config import DRYConfig
|
|
26
26
|
from .python_analyzer import PythonDuplicateAnalyzer
|
|
27
27
|
from .typescript_analyzer import TypeScriptDuplicateAnalyzer
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
@dataclass
|
|
31
|
-
class FileAnalysisContext:
|
|
32
|
-
"""Context for file analysis."""
|
|
33
|
-
|
|
34
|
-
file_path: Path
|
|
35
|
-
content: str
|
|
36
|
-
language: str
|
|
37
|
-
config: DRYConfig
|
|
38
|
-
cache: DRYCache | None
|
|
39
|
-
|
|
40
|
-
|
|
41
30
|
class FileAnalyzer:
|
|
42
|
-
"""Orchestrates file analysis
|
|
31
|
+
"""Orchestrates file analysis for duplicate detection."""
|
|
43
32
|
|
|
44
33
|
def __init__(self, config: DRYConfig | None = None) -> None:
|
|
45
34
|
"""Initialize with language-specific analyzers.
|
|
@@ -77,51 +66,27 @@ class FileAnalyzer:
|
|
|
77
66
|
|
|
78
67
|
return registry
|
|
79
68
|
|
|
80
|
-
def
|
|
69
|
+
def analyze(
|
|
81
70
|
self,
|
|
82
71
|
file_path: Path,
|
|
83
72
|
content: str,
|
|
84
73
|
language: str,
|
|
85
74
|
config: DRYConfig,
|
|
86
|
-
cache: DRYCache | None = None,
|
|
87
75
|
) -> list[CodeBlock]:
|
|
88
|
-
"""Analyze file
|
|
76
|
+
"""Analyze file for duplicate code blocks.
|
|
89
77
|
|
|
90
78
|
Args:
|
|
91
79
|
file_path: Path to file
|
|
92
80
|
content: File content
|
|
93
81
|
language: File language
|
|
94
82
|
config: DRY configuration
|
|
95
|
-
cache: Optional cache instance
|
|
96
83
|
|
|
97
84
|
Returns:
|
|
98
85
|
List of CodeBlock instances
|
|
99
86
|
"""
|
|
100
|
-
# Check if file is fresh in cache
|
|
101
|
-
if cache:
|
|
102
|
-
mtime = file_path.stat().st_mtime
|
|
103
|
-
if cache.is_fresh(file_path, mtime):
|
|
104
|
-
return cache.load(file_path)
|
|
105
|
-
|
|
106
87
|
# Analyze file based on language
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def _analyze_file(
|
|
110
|
-
self, file_path: Path, content: str, language: str, config: DRYConfig
|
|
111
|
-
) -> list[CodeBlock]:
|
|
112
|
-
"""Analyze file based on language.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
file_path: Path to file
|
|
116
|
-
content: File content
|
|
117
|
-
language: File language
|
|
118
|
-
config: DRY configuration
|
|
119
|
-
|
|
120
|
-
Returns:
|
|
121
|
-
List of CodeBlock instances
|
|
122
|
-
"""
|
|
123
|
-
if language == "python":
|
|
88
|
+
if language == Language.PYTHON:
|
|
124
89
|
return self._python_analyzer.analyze(file_path, content, config)
|
|
125
|
-
if language in (
|
|
90
|
+
if language in (Language.TYPESCRIPT, Language.JAVASCRIPT):
|
|
126
91
|
return self._typescript_analyzer.analyze(file_path, content, config)
|
|
127
92
|
return []
|
src/linters/dry/inline_ignore.py
CHANGED
|
@@ -50,14 +50,11 @@ class InlineIgnoreParser:
|
|
|
50
50
|
Returns:
|
|
51
51
|
List of (start, end) tuples for ignore ranges
|
|
52
52
|
"""
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
ignore_range
|
|
57
|
-
|
|
58
|
-
ranges.append(ignore_range)
|
|
59
|
-
|
|
60
|
-
return ranges
|
|
53
|
+
return [
|
|
54
|
+
ignore_range
|
|
55
|
+
for i, line in enumerate(lines, start=1)
|
|
56
|
+
if (ignore_range := self._parse_ignore_directive(line, i, len(lines)))
|
|
57
|
+
]
|
|
61
58
|
|
|
62
59
|
def _parse_ignore_directive(
|
|
63
60
|
self, line: str, line_num: int, total_lines: int
|
|
@@ -115,10 +112,7 @@ class InlineIgnoreParser:
|
|
|
115
112
|
Returns:
|
|
116
113
|
True if ranges overlap
|
|
117
114
|
"""
|
|
118
|
-
for ign_start, ign_end in ranges
|
|
119
|
-
if line <= ign_end and end_line >= ign_start:
|
|
120
|
-
return True
|
|
121
|
-
return False
|
|
115
|
+
return any(line <= ign_end and end_line >= ign_start for ign_start, ign_end in ranges)
|
|
122
116
|
|
|
123
117
|
def _check_single_line(self, line: int, ranges: list[tuple[int, int]]) -> bool:
|
|
124
118
|
"""Check if single line is in any ignore range.
|
|
@@ -130,10 +124,7 @@ class InlineIgnoreParser:
|
|
|
130
124
|
Returns:
|
|
131
125
|
True if line is in any range
|
|
132
126
|
"""
|
|
133
|
-
for start, end in ranges
|
|
134
|
-
if start <= line <= end:
|
|
135
|
-
return True
|
|
136
|
-
return False
|
|
127
|
+
return any(start <= line <= end for start, end in ranges)
|
|
137
128
|
|
|
138
129
|
def clear(self) -> None:
|
|
139
130
|
"""Clear all stored ignore ranges."""
|