thailint 0.2.0__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. src/__init__.py +1 -0
  2. src/analyzers/__init__.py +4 -3
  3. src/analyzers/ast_utils.py +54 -0
  4. src/analyzers/rust_base.py +155 -0
  5. src/analyzers/rust_context.py +141 -0
  6. src/analyzers/typescript_base.py +4 -0
  7. src/cli/__init__.py +30 -0
  8. src/cli/__main__.py +22 -0
  9. src/cli/config.py +480 -0
  10. src/cli/config_merge.py +241 -0
  11. src/cli/linters/__init__.py +67 -0
  12. src/cli/linters/code_patterns.py +270 -0
  13. src/cli/linters/code_smells.py +342 -0
  14. src/cli/linters/documentation.py +83 -0
  15. src/cli/linters/performance.py +287 -0
  16. src/cli/linters/shared.py +331 -0
  17. src/cli/linters/structure.py +327 -0
  18. src/cli/linters/structure_quality.py +328 -0
  19. src/cli/main.py +120 -0
  20. src/cli/utils.py +395 -0
  21. src/cli_main.py +37 -0
  22. src/config.py +44 -27
  23. src/core/base.py +95 -5
  24. src/core/cli_utils.py +19 -2
  25. src/core/config_parser.py +36 -6
  26. src/core/constants.py +54 -0
  27. src/core/linter_utils.py +95 -6
  28. src/core/python_lint_rule.py +101 -0
  29. src/core/registry.py +1 -1
  30. src/core/rule_discovery.py +147 -84
  31. src/core/types.py +13 -0
  32. src/core/violation_builder.py +78 -15
  33. src/core/violation_utils.py +69 -0
  34. src/formatters/__init__.py +22 -0
  35. src/formatters/sarif.py +202 -0
  36. src/linter_config/directive_markers.py +109 -0
  37. src/linter_config/ignore.py +254 -395
  38. src/linter_config/loader.py +45 -12
  39. src/linter_config/pattern_utils.py +65 -0
  40. src/linter_config/rule_matcher.py +89 -0
  41. src/linters/collection_pipeline/__init__.py +90 -0
  42. src/linters/collection_pipeline/any_all_analyzer.py +281 -0
  43. src/linters/collection_pipeline/ast_utils.py +40 -0
  44. src/linters/collection_pipeline/config.py +75 -0
  45. src/linters/collection_pipeline/continue_analyzer.py +94 -0
  46. src/linters/collection_pipeline/detector.py +360 -0
  47. src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
  48. src/linters/collection_pipeline/linter.py +420 -0
  49. src/linters/collection_pipeline/suggestion_builder.py +130 -0
  50. src/linters/cqs/__init__.py +54 -0
  51. src/linters/cqs/config.py +55 -0
  52. src/linters/cqs/function_analyzer.py +201 -0
  53. src/linters/cqs/input_detector.py +139 -0
  54. src/linters/cqs/linter.py +159 -0
  55. src/linters/cqs/output_detector.py +84 -0
  56. src/linters/cqs/python_analyzer.py +54 -0
  57. src/linters/cqs/types.py +82 -0
  58. src/linters/cqs/typescript_cqs_analyzer.py +61 -0
  59. src/linters/cqs/typescript_function_analyzer.py +192 -0
  60. src/linters/cqs/typescript_input_detector.py +203 -0
  61. src/linters/cqs/typescript_output_detector.py +117 -0
  62. src/linters/cqs/violation_builder.py +94 -0
  63. src/linters/dry/base_token_analyzer.py +16 -9
  64. src/linters/dry/block_filter.py +125 -22
  65. src/linters/dry/block_grouper.py +4 -0
  66. src/linters/dry/cache.py +142 -94
  67. src/linters/dry/cache_query.py +4 -0
  68. src/linters/dry/config.py +68 -21
  69. src/linters/dry/constant.py +92 -0
  70. src/linters/dry/constant_matcher.py +223 -0
  71. src/linters/dry/constant_violation_builder.py +98 -0
  72. src/linters/dry/duplicate_storage.py +20 -82
  73. src/linters/dry/file_analyzer.py +15 -50
  74. src/linters/dry/inline_ignore.py +7 -16
  75. src/linters/dry/linter.py +182 -54
  76. src/linters/dry/python_analyzer.py +108 -336
  77. src/linters/dry/python_constant_extractor.py +100 -0
  78. src/linters/dry/single_statement_detector.py +417 -0
  79. src/linters/dry/storage_initializer.py +9 -18
  80. src/linters/dry/token_hasher.py +129 -71
  81. src/linters/dry/typescript_analyzer.py +68 -380
  82. src/linters/dry/typescript_constant_extractor.py +138 -0
  83. src/linters/dry/typescript_statement_detector.py +255 -0
  84. src/linters/dry/typescript_value_extractor.py +70 -0
  85. src/linters/dry/violation_builder.py +4 -0
  86. src/linters/dry/violation_filter.py +9 -5
  87. src/linters/dry/violation_generator.py +71 -14
  88. src/linters/file_header/__init__.py +24 -0
  89. src/linters/file_header/atemporal_detector.py +105 -0
  90. src/linters/file_header/base_parser.py +93 -0
  91. src/linters/file_header/bash_parser.py +66 -0
  92. src/linters/file_header/config.py +140 -0
  93. src/linters/file_header/css_parser.py +70 -0
  94. src/linters/file_header/field_validator.py +72 -0
  95. src/linters/file_header/linter.py +309 -0
  96. src/linters/file_header/markdown_parser.py +130 -0
  97. src/linters/file_header/python_parser.py +42 -0
  98. src/linters/file_header/typescript_parser.py +73 -0
  99. src/linters/file_header/violation_builder.py +79 -0
  100. src/linters/file_placement/config_loader.py +3 -1
  101. src/linters/file_placement/directory_matcher.py +4 -0
  102. src/linters/file_placement/linter.py +74 -31
  103. src/linters/file_placement/pattern_matcher.py +41 -6
  104. src/linters/file_placement/pattern_validator.py +31 -12
  105. src/linters/file_placement/rule_checker.py +12 -7
  106. src/linters/lazy_ignores/__init__.py +43 -0
  107. src/linters/lazy_ignores/config.py +74 -0
  108. src/linters/lazy_ignores/directive_utils.py +164 -0
  109. src/linters/lazy_ignores/header_parser.py +177 -0
  110. src/linters/lazy_ignores/linter.py +158 -0
  111. src/linters/lazy_ignores/matcher.py +168 -0
  112. src/linters/lazy_ignores/python_analyzer.py +209 -0
  113. src/linters/lazy_ignores/rule_id_utils.py +180 -0
  114. src/linters/lazy_ignores/skip_detector.py +298 -0
  115. src/linters/lazy_ignores/types.py +71 -0
  116. src/linters/lazy_ignores/typescript_analyzer.py +146 -0
  117. src/linters/lazy_ignores/violation_builder.py +135 -0
  118. src/linters/lbyl/__init__.py +31 -0
  119. src/linters/lbyl/config.py +63 -0
  120. src/linters/lbyl/linter.py +67 -0
  121. src/linters/lbyl/pattern_detectors/__init__.py +53 -0
  122. src/linters/lbyl/pattern_detectors/base.py +63 -0
  123. src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
  124. src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
  125. src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
  126. src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
  127. src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
  128. src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
  129. src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
  130. src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
  131. src/linters/lbyl/python_analyzer.py +215 -0
  132. src/linters/lbyl/violation_builder.py +354 -0
  133. src/linters/magic_numbers/__init__.py +48 -0
  134. src/linters/magic_numbers/config.py +82 -0
  135. src/linters/magic_numbers/context_analyzer.py +249 -0
  136. src/linters/magic_numbers/linter.py +462 -0
  137. src/linters/magic_numbers/python_analyzer.py +64 -0
  138. src/linters/magic_numbers/typescript_analyzer.py +215 -0
  139. src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
  140. src/linters/magic_numbers/violation_builder.py +98 -0
  141. src/linters/method_property/__init__.py +49 -0
  142. src/linters/method_property/config.py +138 -0
  143. src/linters/method_property/linter.py +414 -0
  144. src/linters/method_property/python_analyzer.py +473 -0
  145. src/linters/method_property/violation_builder.py +119 -0
  146. src/linters/nesting/__init__.py +6 -2
  147. src/linters/nesting/config.py +6 -3
  148. src/linters/nesting/linter.py +31 -34
  149. src/linters/nesting/python_analyzer.py +4 -0
  150. src/linters/nesting/typescript_analyzer.py +6 -11
  151. src/linters/nesting/violation_builder.py +1 -0
  152. src/linters/performance/__init__.py +91 -0
  153. src/linters/performance/config.py +43 -0
  154. src/linters/performance/constants.py +49 -0
  155. src/linters/performance/linter.py +149 -0
  156. src/linters/performance/python_analyzer.py +365 -0
  157. src/linters/performance/regex_analyzer.py +312 -0
  158. src/linters/performance/regex_linter.py +139 -0
  159. src/linters/performance/typescript_analyzer.py +236 -0
  160. src/linters/performance/violation_builder.py +160 -0
  161. src/linters/print_statements/__init__.py +53 -0
  162. src/linters/print_statements/config.py +78 -0
  163. src/linters/print_statements/linter.py +413 -0
  164. src/linters/print_statements/python_analyzer.py +153 -0
  165. src/linters/print_statements/typescript_analyzer.py +125 -0
  166. src/linters/print_statements/violation_builder.py +96 -0
  167. src/linters/srp/__init__.py +3 -3
  168. src/linters/srp/class_analyzer.py +11 -7
  169. src/linters/srp/config.py +12 -6
  170. src/linters/srp/heuristics.py +56 -22
  171. src/linters/srp/linter.py +47 -39
  172. src/linters/srp/python_analyzer.py +55 -20
  173. src/linters/srp/typescript_metrics_calculator.py +110 -50
  174. src/linters/stateless_class/__init__.py +25 -0
  175. src/linters/stateless_class/config.py +58 -0
  176. src/linters/stateless_class/linter.py +349 -0
  177. src/linters/stateless_class/python_analyzer.py +290 -0
  178. src/linters/stringly_typed/__init__.py +36 -0
  179. src/linters/stringly_typed/config.py +189 -0
  180. src/linters/stringly_typed/context_filter.py +451 -0
  181. src/linters/stringly_typed/function_call_violation_builder.py +135 -0
  182. src/linters/stringly_typed/ignore_checker.py +100 -0
  183. src/linters/stringly_typed/ignore_utils.py +51 -0
  184. src/linters/stringly_typed/linter.py +376 -0
  185. src/linters/stringly_typed/python/__init__.py +33 -0
  186. src/linters/stringly_typed/python/analyzer.py +348 -0
  187. src/linters/stringly_typed/python/call_tracker.py +175 -0
  188. src/linters/stringly_typed/python/comparison_tracker.py +257 -0
  189. src/linters/stringly_typed/python/condition_extractor.py +134 -0
  190. src/linters/stringly_typed/python/conditional_detector.py +179 -0
  191. src/linters/stringly_typed/python/constants.py +21 -0
  192. src/linters/stringly_typed/python/match_analyzer.py +94 -0
  193. src/linters/stringly_typed/python/validation_detector.py +189 -0
  194. src/linters/stringly_typed/python/variable_extractor.py +96 -0
  195. src/linters/stringly_typed/storage.py +620 -0
  196. src/linters/stringly_typed/storage_initializer.py +45 -0
  197. src/linters/stringly_typed/typescript/__init__.py +28 -0
  198. src/linters/stringly_typed/typescript/analyzer.py +157 -0
  199. src/linters/stringly_typed/typescript/call_tracker.py +335 -0
  200. src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
  201. src/linters/stringly_typed/violation_generator.py +419 -0
  202. src/orchestrator/core.py +264 -16
  203. src/orchestrator/language_detector.py +5 -3
  204. src/templates/thailint_config_template.yaml +354 -0
  205. src/utils/project_root.py +138 -16
  206. thailint-0.15.3.dist-info/METADATA +187 -0
  207. thailint-0.15.3.dist-info/RECORD +226 -0
  208. {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +1 -1
  209. thailint-0.15.3.dist-info/entry_points.txt +4 -0
  210. src/cli.py +0 -1055
  211. thailint-0.2.0.dist-info/METADATA +0 -980
  212. thailint-0.2.0.dist-info/RECORD +0 -75
  213. thailint-0.2.0.dist-info/entry_points.txt +0 -4
  214. {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,223 @@
1
+ """
2
+ Purpose: Fuzzy matching for constant names across files
3
+
4
+ Scope: Constant name matching with word-set and edit distance algorithms
5
+
6
+ Overview: Implements fuzzy matching strategies to identify related constants across files. Uses
7
+ two matching strategies: word-set matching (same words in different order, e.g., API_TIMEOUT
8
+ and TIMEOUT_API) and edit distance matching (typos within Levenshtein distance <= 2, e.g.,
9
+ MAX_RETRYS and MAX_RETRIES). Single-word constants (e.g., MAX, TIMEOUT) only use exact
10
+ matching to avoid false positives. Groups related constants into ConstantGroup instances
11
+ for violation reporting.
12
+
13
+ Dependencies: ConstantInfo, ConstantLocation, ConstantGroup from constant module
14
+
15
+ Exports: find_constant_groups function
16
+
17
+ Interfaces: find_constant_groups(constants) -> list[ConstantGroup]
18
+
19
+ Implementation: Union-Find algorithm for grouping, word-set hashing, Levenshtein distance calculation
20
+
21
+ Suppressions:
22
+ - arguments-out-of-order: Named arguments used for clarity in ConstantLocation
23
+ """
24
+
25
+ from collections.abc import Callable
26
+ from itertools import combinations
27
+ from pathlib import Path
28
+
29
+ from .constant import ConstantGroup, ConstantInfo, ConstantLocation
30
+
31
+ # Maximum edit distance for fuzzy matching
32
+ MAX_EDIT_DISTANCE = 2
33
+
34
+ # Antonym pairs that should not be fuzzy-matched
35
+ # If one name contains a word from the left side and the other contains the right side,
36
+ # they represent different concepts and should not be grouped together
37
+ ANTONYM_PAIRS = frozenset(
38
+ (
39
+ frozenset(("max", "min")),
40
+ frozenset(("start", "end")),
41
+ frozenset(("first", "last")),
42
+ frozenset(("before", "after")),
43
+ frozenset(("open", "close")),
44
+ frozenset(("read", "write")),
45
+ frozenset(("get", "set")),
46
+ frozenset(("push", "pop")),
47
+ frozenset(("add", "remove")),
48
+ frozenset(("create", "delete")),
49
+ frozenset(("enable", "disable")),
50
+ frozenset(("show", "hide")),
51
+ frozenset(("up", "down")),
52
+ frozenset(("left", "right")),
53
+ frozenset(("top", "bottom")),
54
+ frozenset(("prev", "next")),
55
+ frozenset(("success", "failure")),
56
+ frozenset(("true", "false")),
57
+ frozenset(("on", "off")),
58
+ frozenset(("in", "out")),
59
+ )
60
+ )
61
+
62
+ # Minimum length for constant names (exclude single-letter type params like P, T, K, V)
63
+ MIN_CONSTANT_NAME_LENGTH = 2
64
+
65
+
66
+ class UnionFind:
67
+ """Union-Find data structure for grouping."""
68
+
69
+ def __init__(self, items: list[str]) -> None:
70
+ """Initialize with list of items."""
71
+ self._parent = {item: item for item in items}
72
+
73
+ def find(self, x: str) -> str:
74
+ """Find root with path compression."""
75
+ if self._parent[x] != x:
76
+ self._parent[x] = self.find(self._parent[x])
77
+ return self._parent[x]
78
+
79
+ def union(self, x: str, y: str) -> None:
80
+ """Merge two sets."""
81
+ px, py = self.find(x), self.find(y)
82
+ if px != py:
83
+ self._parent[px] = py
84
+
85
+
86
+ def find_constant_groups(constants: list[tuple[Path, ConstantInfo]]) -> list[ConstantGroup]:
87
+ """Find groups of related constants.
88
+
89
+ Args:
90
+ constants: List of (file_path, ConstantInfo) tuples
91
+
92
+ Returns:
93
+ List of ConstantGroup instances representing related constants
94
+ """
95
+ if not constants:
96
+ return []
97
+ locations = _build_locations(constants)
98
+ exact_groups = _group_by_exact_name(locations)
99
+ return _merge_fuzzy_groups(exact_groups)
100
+
101
+
102
+ def _merge_fuzzy_groups(groups: dict[str, ConstantGroup]) -> list[ConstantGroup]:
103
+ """Merge groups that match via fuzzy matching."""
104
+ names = list(groups.keys())
105
+ uf = UnionFind(names)
106
+ _union_matching_pairs(names, uf, _is_fuzzy_match)
107
+ return _build_merged_groups(names, groups, uf)
108
+
109
+
110
+ def _is_fuzzy_match(name1: str, name2: str) -> bool:
111
+ """Check if two constant names should be considered a match."""
112
+ if name1 == name2:
113
+ return True
114
+ return _is_fuzzy_similar(name1, name2)
115
+
116
+
117
+ def _build_locations(constants: list[tuple[Path, ConstantInfo]]) -> list[ConstantLocation]:
118
+ """Build location list from constants."""
119
+ return [
120
+ ConstantLocation(
121
+ file_path=file_path, line_number=info.line_number, name=info.name, value=info.value
122
+ )
123
+ for file_path, info in constants
124
+ ]
125
+
126
+
127
+ def _group_by_exact_name(locations: list[ConstantLocation]) -> dict[str, ConstantGroup]:
128
+ """Group locations by exact constant name."""
129
+ groups: dict[str, ConstantGroup] = {}
130
+ for loc in locations:
131
+ if loc.name not in groups:
132
+ groups[loc.name] = ConstantGroup(
133
+ canonical_name=loc.name, locations=[], all_names=set(), is_fuzzy_match=False
134
+ )
135
+ groups[loc.name].add_location(loc)
136
+ return groups
137
+
138
+
139
+ def _union_matching_pairs(
140
+ names: list[str], uf: UnionFind, is_match: Callable[[str, str], bool]
141
+ ) -> None:
142
+ """Union all pairs of names that match."""
143
+ for name1, name2 in combinations(names, 2):
144
+ if is_match(name1, name2):
145
+ uf.union(name1, name2)
146
+
147
+
148
+ def _build_merged_groups(
149
+ names: list[str], groups: dict[str, ConstantGroup], uf: UnionFind
150
+ ) -> list[ConstantGroup]:
151
+ """Build merged groups from union-find structure."""
152
+ merged: dict[str, ConstantGroup] = {}
153
+ for name in names:
154
+ root = uf.find(name)
155
+ if root not in merged:
156
+ merged[root] = ConstantGroup(
157
+ canonical_name=root, locations=[], all_names=set(), is_fuzzy_match=False
158
+ )
159
+ for loc in groups[name].locations:
160
+ merged[root].add_location(loc)
161
+ if name != root:
162
+ merged[root].is_fuzzy_match = True
163
+ return list(merged.values())
164
+
165
+
166
+ def _get_words(name: str) -> list[str]:
167
+ """Split constant name into lowercase words."""
168
+ return [w.lower() for w in name.split("_") if w]
169
+
170
+
171
+ def _is_fuzzy_similar(name1: str, name2: str) -> bool:
172
+ """Check if two names are fuzzy similar (word-set or edit distance)."""
173
+ words1, words2 = _get_words(name1), _get_words(name2)
174
+ if not _has_enough_words(words1, words2):
175
+ return False
176
+ if _has_antonym_conflict(set(words1), set(words2)):
177
+ return False
178
+ return _word_set_match(words1, words2) or _edit_distance_match(name1, name2)
179
+
180
+
181
+ def _has_enough_words(words1: list[str], words2: list[str]) -> bool:
182
+ """Check if both word lists have at least 2 words for fuzzy matching."""
183
+ return len(words1) >= 2 and len(words2) >= 2
184
+
185
+
186
+ def _word_set_match(words1: list[str], words2: list[str]) -> bool:
187
+ """Check if two word lists contain the same words."""
188
+ return set(words1) == set(words2)
189
+
190
+
191
+ def _has_antonym_conflict(set1: set[str], set2: set[str]) -> bool:
192
+ """Check if word sets contain conflicting antonyms (e.g., MAX vs MIN)."""
193
+ return any(_is_antonym_split(pair, set1, set2) for pair in ANTONYM_PAIRS)
194
+
195
+
196
+ def _is_antonym_split(pair: frozenset[str], set1: set[str], set2: set[str]) -> bool:
197
+ """Check if one set has one word of the pair and the other has the opposite."""
198
+ pair_list = tuple(pair)
199
+ word_a, word_b = pair_list[0], pair_list[1]
200
+ return (word_a in set1 and word_b in set2) or (word_b in set1 and word_a in set2)
201
+
202
+
203
+ def _edit_distance_match(name1: str, name2: str) -> bool:
204
+ """Check if names match within edit distance threshold."""
205
+ return _levenshtein_distance(name1.lower(), name2.lower()) <= MAX_EDIT_DISTANCE
206
+
207
+
208
+ def _levenshtein_distance(s1: str, s2: str) -> int:
209
+ """Calculate Levenshtein distance between two strings."""
210
+ if len(s1) < len(s2):
211
+ return _levenshtein_distance(s2, s1) # pylint: disable=arguments-out-of-order
212
+ if len(s2) == 0:
213
+ return len(s1)
214
+ previous_row = list(range(len(s2) + 1))
215
+ for i, c1 in enumerate(s1):
216
+ current_row = [i + 1]
217
+ for j, c2 in enumerate(s2):
218
+ insertions = previous_row[j + 1] + 1
219
+ deletions = current_row[j] + 1
220
+ substitutions = previous_row[j] + (c1 != c2)
221
+ current_row.append(min(insertions, deletions, substitutions))
222
+ previous_row = current_row
223
+ return previous_row[-1]
@@ -0,0 +1,98 @@
1
+ """
2
+ Purpose: Build violation messages for duplicate constants
3
+
4
+ Scope: Violation message formatting for constant duplication detection
5
+
6
+ Overview: Formats detailed violation messages for duplicate constant detection. Creates messages
7
+ that include the constant name(s), all file locations with line numbers, and the values
8
+ assigned at each location. Distinguishes between exact matches (same constant name) and
9
+ fuzzy matches (similar names like API_TIMEOUT and TIMEOUT_API). Provides actionable guidance
10
+ to consolidate constants into a shared module.
11
+
12
+ Dependencies: ConstantGroup from constant module, Violation from core.types
13
+
14
+ Exports: ConstantViolationBuilder class
15
+
16
+ Interfaces: ConstantViolationBuilder.build_violations(groups, rule_id) -> list[Violation]
17
+
18
+ Implementation: Message template formatting with location enumeration and fuzzy match indication
19
+ """
20
+
21
+ from src.core.types import Severity, Violation
22
+
23
+ from .constant import ConstantGroup, ConstantLocation
24
+
25
+ # Maximum other locations to show in violation message
26
+ MAX_DISPLAYED_LOCATIONS = 3
27
+
28
+
29
+ class ConstantViolationBuilder:
30
+ """Builds violation messages for duplicate constants."""
31
+
32
+ def __init__(self, min_occurrences: int = 2) -> None:
33
+ """Initialize with minimum occurrence threshold."""
34
+ self.min_occurrences = min_occurrences
35
+
36
+ def build_violations(self, groups: list[ConstantGroup], rule_id: str) -> list[Violation]:
37
+ """Build violations from constant groups."""
38
+ violations = []
39
+ for group in groups:
40
+ if group.file_count >= self.min_occurrences:
41
+ violations.extend(self._violations_for_group(group, rule_id))
42
+ return violations
43
+
44
+ def _violations_for_group(self, group: ConstantGroup, rule_id: str) -> list[Violation]:
45
+ """Create violations for all locations in a group."""
46
+ return [
47
+ Violation(
48
+ rule_id=rule_id,
49
+ file_path=str(loc.file_path),
50
+ line=loc.line_number,
51
+ column=1,
52
+ message=self._format_message(group, loc),
53
+ severity=Severity.ERROR,
54
+ )
55
+ for loc in group.locations
56
+ ]
57
+
58
+ def _format_message(self, group: ConstantGroup, current: ConstantLocation) -> str:
59
+ """Format the violation message based on match type."""
60
+ others = _get_other_locations(group, current)
61
+ locations_text = _format_locations_text(others)
62
+ if group.is_fuzzy_match:
63
+ names_str = " ≈ ".join(f"'{n}'" for n in sorted(group.all_names))
64
+ return (
65
+ f"Similar constants found: {names_str} in {group.file_count} files. "
66
+ f"{locations_text} "
67
+ f"These appear to represent the same concept - consider standardizing the name."
68
+ )
69
+ return (
70
+ f"Duplicate constant '{group.canonical_name}' defined in {group.file_count} files. "
71
+ f"{locations_text} "
72
+ f"Consider consolidating to a shared constants module."
73
+ )
74
+
75
+
76
+ def _get_other_locations(group: ConstantGroup, current: ConstantLocation) -> list[ConstantLocation]:
77
+ """Get locations excluding current (module-level helper)."""
78
+ return [
79
+ loc
80
+ for loc in group.locations
81
+ if loc.file_path != current.file_path or loc.line_number != current.line_number
82
+ ]
83
+
84
+
85
+ def _format_locations_text(others: list[ConstantLocation]) -> str:
86
+ """Format other locations as text (module-level helper)."""
87
+ if not others:
88
+ return ""
89
+ parts = [_format_single_location(loc) for loc in others[:MAX_DISPLAYED_LOCATIONS]]
90
+ result = "Also found in: " + ", ".join(parts)
91
+ extra = len(others) - MAX_DISPLAYED_LOCATIONS
92
+ return result + (f" and {extra} more." if extra > 0 else ".")
93
+
94
+
95
+ def _format_single_location(loc: ConstantLocation) -> str:
96
+ """Format a single location for display (module-level helper)."""
97
+ value_str = f" = {loc.value}" if loc.value else ""
98
+ return f"{loc.file_path.name}:{loc.line_number} ({loc.name}{value_str})"
@@ -1,21 +1,20 @@
1
1
  """
2
- Purpose: Storage management for duplicate code blocks with cache and memory fallback
2
+ Purpose: Storage management for duplicate code blocks in SQLite
3
3
 
4
- Scope: Manages storage of code blocks in SQLite cache or in-memory dict
4
+ Scope: Manages storage of code blocks in SQLite for duplicate detection
5
5
 
6
- Overview: Provides unified storage interface for code blocks supporting both SQLite-backed caching
7
- and in-memory fallback when cache disabled. Handles block insertion, retrieval, and duplicate
8
- hash queries. Encapsulates Decision 6 (in-memory fallback) implementation. Separates storage
9
- concerns from linting logic to maintain SRP compliance.
6
+ Overview: Provides storage interface for code blocks using SQLite (in-memory or tempfile mode).
7
+ Handles block insertion and duplicate hash queries. Delegates all storage operations to
8
+ DRYCache SQLite layer. Separates storage concerns from linting logic to maintain SRP compliance.
10
9
 
11
10
  Dependencies: DRYCache, CodeBlock, Path
12
11
 
13
12
  Exports: DuplicateStorage class
14
13
 
15
- Interfaces: DuplicateStorage.add_blocks(file_path, blocks), get_duplicate_hashes(),
14
+ Interfaces: DuplicateStorage.add_blocks(file_path, blocks), duplicate_hashes property,
16
15
  get_blocks_for_hash(hash_value)
17
16
 
18
- Implementation: Delegates to either SQLite cache or in-memory dict based on cache_enabled setting
17
+ Implementation: Delegates to SQLite cache for all storage operations
19
18
  """
20
19
 
21
20
  from pathlib import Path
@@ -24,82 +23,37 @@ from .cache import CodeBlock, DRYCache
24
23
 
25
24
 
26
25
  class DuplicateStorage:
27
- """Manages storage of code blocks in cache or memory."""
26
+ """Manages storage of code blocks in SQLite."""
28
27
 
29
- def __init__(self, cache: DRYCache | None) -> None:
30
- """Initialize storage with optional cache.
28
+ def __init__(self, cache: DRYCache) -> None:
29
+ """Initialize storage with SQLite cache.
31
30
 
32
31
  Args:
33
- cache: SQLite cache instance (None for in-memory mode)
32
+ cache: SQLite cache instance (in-memory or tempfile mode)
34
33
  """
35
34
  self._cache = cache
36
- self._memory_store: dict[int, list[CodeBlock]] = {}
37
35
 
38
36
  def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
39
- """Add code blocks to storage and cache.
37
+ """Add code blocks to SQLite storage.
40
38
 
41
39
  Args:
42
40
  file_path: Path to source file
43
41
  blocks: List of code blocks to store
44
42
  """
45
- # Always add to memory for duplicate detection
46
- self._add_to_memory(blocks)
43
+ if blocks:
44
+ self._cache.add_blocks(file_path, blocks)
47
45
 
48
- # Also persist to cache if available
49
- if self._cache:
50
- self._add_to_cache(file_path, blocks)
51
-
52
- def add_blocks_to_memory(self, file_path: Path, blocks: list[CodeBlock]) -> None:
53
- """Add code blocks to in-memory storage only (for cache hits).
54
-
55
- Args:
56
- file_path: Path to source file (used for cache persistence check)
57
- blocks: List of code blocks to store
58
- """
59
- # Add to memory for duplicate detection this run
60
- self._add_to_memory(blocks)
61
-
62
- # Guard clauses - early returns for skip conditions
63
- if not self._cache:
64
- return
65
-
66
- if not blocks:
67
- return
68
-
69
- # Update cache with new blocks if needed (for fresh analysis)
70
- self._update_cache_if_fresh(file_path, blocks)
71
-
72
- def _update_cache_if_fresh(self, file_path: Path, blocks: list[CodeBlock]) -> None:
73
- """Update cache if file analysis is fresh (not from cache).
74
-
75
- Args:
76
- file_path: Path to source file
77
- blocks: List of code blocks to store
78
- """
79
- if not self._cache:
80
- return
81
-
82
- try:
83
- mtime = file_path.stat().st_mtime
84
- except OSError:
85
- # File doesn't exist, skip cache
86
- return
87
-
88
- # File was analyzed (not cached), so persist if not fresh
89
- if not self._cache.is_fresh(file_path, mtime):
90
- self._add_to_cache(file_path, blocks)
91
-
92
- def get_duplicate_hashes(self) -> list[int]:
93
- """Get all hash values with 2+ occurrences from memory.
46
+ @property
47
+ def duplicate_hashes(self) -> list[int]:
48
+ """Hash values with 2+ occurrences from SQLite.
94
49
 
95
50
  Returns:
96
51
  List of hash values that appear in multiple blocks
97
52
  """
98
- # Always query from in-memory store for this run's files
99
- return [h for h, blocks in self._memory_store.items() if len(blocks) >= 2]
53
+ return self._cache.duplicate_hashes
100
54
 
101
55
  def get_blocks_for_hash(self, hash_value: int) -> list[CodeBlock]:
102
- """Get all blocks with given hash value from memory.
56
+ """Get all blocks with given hash value from SQLite.
103
57
 
104
58
  Args:
105
59
  hash_value: Hash to search for
@@ -107,20 +61,4 @@ class DuplicateStorage:
107
61
  Returns:
108
62
  List of code blocks with this hash
109
63
  """
110
- # Always query from in-memory store for this run's files
111
- return self._memory_store.get(hash_value, [])
112
-
113
- def _add_to_cache(self, file_path: Path, blocks: list[CodeBlock]) -> None:
114
- """Add blocks to SQLite cache."""
115
- if not self._cache or not blocks:
116
- return
117
-
118
- mtime = file_path.stat().st_mtime
119
- self._cache.save(file_path, mtime, blocks)
120
-
121
- def _add_to_memory(self, blocks: list[CodeBlock]) -> None:
122
- """Add blocks to in-memory store."""
123
- for block in blocks:
124
- if block.hash_value not in self._memory_store:
125
- self._memory_store[block.hash_value] = []
126
- self._memory_store[block.hash_value].append(block)
64
+ return self._cache.find_duplicates_by_hash(hash_value)
@@ -1,45 +1,34 @@
1
1
  """
2
2
  Purpose: File analysis orchestration for duplicate detection
3
3
 
4
- Scope: Coordinates language-specific analyzers and cache checking
4
+ Scope: Coordinates language-specific analyzers
5
5
 
6
- Overview: Orchestrates file analysis by delegating to language-specific analyzers (Python, TypeScript)
7
- and checking cache freshness. Handles cache hits by loading from cache, and cache misses by
8
- analyzing files. Separates file analysis orchestration from main linter rule logic to maintain
9
- SRP compliance.
6
+ Overview: Orchestrates file analysis by delegating to language-specific analyzers (Python, TypeScript).
7
+ Analyzes files fresh every run - no cache loading. Separates file analysis orchestration from
8
+ main linter rule logic to maintain SRP compliance.
10
9
 
11
- Dependencies: PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer, DRYCache, DRYConfig, CodeBlock
10
+ Dependencies: PythonDuplicateAnalyzer, TypeScriptDuplicateAnalyzer, DRYConfig, CodeBlock
12
11
 
13
12
  Exports: FileAnalyzer class
14
13
 
15
- Interfaces: FileAnalyzer.analyze_or_load(file_path, content, language, config, cache)
14
+ Interfaces: FileAnalyzer.analyze(file_path, content, language, config)
16
15
 
17
- Implementation: Delegates to language-specific analyzers, checks cache freshness
16
+ Implementation: Delegates to language-specific analyzers, always performs fresh analysis
18
17
  """
19
18
 
20
- from dataclasses import dataclass
21
19
  from pathlib import Path
22
20
 
21
+ from src.core.constants import Language
22
+
23
23
  from .block_filter import BlockFilterRegistry, create_default_registry
24
- from .cache import CodeBlock, DRYCache
24
+ from .cache import CodeBlock
25
25
  from .config import DRYConfig
26
26
  from .python_analyzer import PythonDuplicateAnalyzer
27
27
  from .typescript_analyzer import TypeScriptDuplicateAnalyzer
28
28
 
29
29
 
30
- @dataclass
31
- class FileAnalysisContext:
32
- """Context for file analysis."""
33
-
34
- file_path: Path
35
- content: str
36
- language: str
37
- config: DRYConfig
38
- cache: DRYCache | None
39
-
40
-
41
30
  class FileAnalyzer:
42
- """Orchestrates file analysis with cache support."""
31
+ """Orchestrates file analysis for duplicate detection."""
43
32
 
44
33
  def __init__(self, config: DRYConfig | None = None) -> None:
45
34
  """Initialize with language-specific analyzers.
@@ -77,51 +66,27 @@ class FileAnalyzer:
77
66
 
78
67
  return registry
79
68
 
80
- def analyze_or_load( # pylint: disable=too-many-arguments,too-many-positional-arguments
69
+ def analyze(
81
70
  self,
82
71
  file_path: Path,
83
72
  content: str,
84
73
  language: str,
85
74
  config: DRYConfig,
86
- cache: DRYCache | None = None,
87
75
  ) -> list[CodeBlock]:
88
- """Analyze file or load from cache.
76
+ """Analyze file for duplicate code blocks.
89
77
 
90
78
  Args:
91
79
  file_path: Path to file
92
80
  content: File content
93
81
  language: File language
94
82
  config: DRY configuration
95
- cache: Optional cache instance
96
83
 
97
84
  Returns:
98
85
  List of CodeBlock instances
99
86
  """
100
- # Check if file is fresh in cache
101
- if cache:
102
- mtime = file_path.stat().st_mtime
103
- if cache.is_fresh(file_path, mtime):
104
- return cache.load(file_path)
105
-
106
87
  # Analyze file based on language
107
- return self._analyze_file(file_path, content, language, config)
108
-
109
- def _analyze_file(
110
- self, file_path: Path, content: str, language: str, config: DRYConfig
111
- ) -> list[CodeBlock]:
112
- """Analyze file based on language.
113
-
114
- Args:
115
- file_path: Path to file
116
- content: File content
117
- language: File language
118
- config: DRY configuration
119
-
120
- Returns:
121
- List of CodeBlock instances
122
- """
123
- if language == "python":
88
+ if language == Language.PYTHON:
124
89
  return self._python_analyzer.analyze(file_path, content, config)
125
- if language in ("typescript", "javascript"):
90
+ if language in (Language.TYPESCRIPT, Language.JAVASCRIPT):
126
91
  return self._typescript_analyzer.analyze(file_path, content, config)
127
92
  return []
@@ -50,14 +50,11 @@ class InlineIgnoreParser:
50
50
  Returns:
51
51
  List of (start, end) tuples for ignore ranges
52
52
  """
53
- ranges = []
54
-
55
- for i, line in enumerate(lines, start=1):
56
- ignore_range = self._parse_ignore_directive(line, i, len(lines))
57
- if ignore_range:
58
- ranges.append(ignore_range)
59
-
60
- return ranges
53
+ return [
54
+ ignore_range
55
+ for i, line in enumerate(lines, start=1)
56
+ if (ignore_range := self._parse_ignore_directive(line, i, len(lines)))
57
+ ]
61
58
 
62
59
  def _parse_ignore_directive(
63
60
  self, line: str, line_num: int, total_lines: int
@@ -115,10 +112,7 @@ class InlineIgnoreParser:
115
112
  Returns:
116
113
  True if ranges overlap
117
114
  """
118
- for ign_start, ign_end in ranges:
119
- if line <= ign_end and end_line >= ign_start:
120
- return True
121
- return False
115
+ return any(line <= ign_end and end_line >= ign_start for ign_start, ign_end in ranges)
122
116
 
123
117
  def _check_single_line(self, line: int, ranges: list[tuple[int, int]]) -> bool:
124
118
  """Check if single line is in any ignore range.
@@ -130,10 +124,7 @@ class InlineIgnoreParser:
130
124
  Returns:
131
125
  True if line is in any range
132
126
  """
133
- for start, end in ranges:
134
- if start <= line <= end:
135
- return True
136
- return False
127
+ return any(start <= line <= end for start, end in ranges)
137
128
 
138
129
  def clear(self) -> None:
139
130
  """Clear all stored ignore ranges."""