thailint 0.2.0__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. src/__init__.py +1 -0
  2. src/analyzers/__init__.py +4 -3
  3. src/analyzers/ast_utils.py +54 -0
  4. src/analyzers/rust_base.py +155 -0
  5. src/analyzers/rust_context.py +141 -0
  6. src/analyzers/typescript_base.py +4 -0
  7. src/cli/__init__.py +30 -0
  8. src/cli/__main__.py +22 -0
  9. src/cli/config.py +480 -0
  10. src/cli/config_merge.py +241 -0
  11. src/cli/linters/__init__.py +67 -0
  12. src/cli/linters/code_patterns.py +270 -0
  13. src/cli/linters/code_smells.py +342 -0
  14. src/cli/linters/documentation.py +83 -0
  15. src/cli/linters/performance.py +287 -0
  16. src/cli/linters/shared.py +331 -0
  17. src/cli/linters/structure.py +327 -0
  18. src/cli/linters/structure_quality.py +328 -0
  19. src/cli/main.py +120 -0
  20. src/cli/utils.py +395 -0
  21. src/cli_main.py +37 -0
  22. src/config.py +44 -27
  23. src/core/base.py +95 -5
  24. src/core/cli_utils.py +19 -2
  25. src/core/config_parser.py +36 -6
  26. src/core/constants.py +54 -0
  27. src/core/linter_utils.py +95 -6
  28. src/core/python_lint_rule.py +101 -0
  29. src/core/registry.py +1 -1
  30. src/core/rule_discovery.py +147 -84
  31. src/core/types.py +13 -0
  32. src/core/violation_builder.py +78 -15
  33. src/core/violation_utils.py +69 -0
  34. src/formatters/__init__.py +22 -0
  35. src/formatters/sarif.py +202 -0
  36. src/linter_config/directive_markers.py +109 -0
  37. src/linter_config/ignore.py +254 -395
  38. src/linter_config/loader.py +45 -12
  39. src/linter_config/pattern_utils.py +65 -0
  40. src/linter_config/rule_matcher.py +89 -0
  41. src/linters/collection_pipeline/__init__.py +90 -0
  42. src/linters/collection_pipeline/any_all_analyzer.py +281 -0
  43. src/linters/collection_pipeline/ast_utils.py +40 -0
  44. src/linters/collection_pipeline/config.py +75 -0
  45. src/linters/collection_pipeline/continue_analyzer.py +94 -0
  46. src/linters/collection_pipeline/detector.py +360 -0
  47. src/linters/collection_pipeline/filter_map_analyzer.py +402 -0
  48. src/linters/collection_pipeline/linter.py +420 -0
  49. src/linters/collection_pipeline/suggestion_builder.py +130 -0
  50. src/linters/cqs/__init__.py +54 -0
  51. src/linters/cqs/config.py +55 -0
  52. src/linters/cqs/function_analyzer.py +201 -0
  53. src/linters/cqs/input_detector.py +139 -0
  54. src/linters/cqs/linter.py +159 -0
  55. src/linters/cqs/output_detector.py +84 -0
  56. src/linters/cqs/python_analyzer.py +54 -0
  57. src/linters/cqs/types.py +82 -0
  58. src/linters/cqs/typescript_cqs_analyzer.py +61 -0
  59. src/linters/cqs/typescript_function_analyzer.py +192 -0
  60. src/linters/cqs/typescript_input_detector.py +203 -0
  61. src/linters/cqs/typescript_output_detector.py +117 -0
  62. src/linters/cqs/violation_builder.py +94 -0
  63. src/linters/dry/base_token_analyzer.py +16 -9
  64. src/linters/dry/block_filter.py +125 -22
  65. src/linters/dry/block_grouper.py +4 -0
  66. src/linters/dry/cache.py +142 -94
  67. src/linters/dry/cache_query.py +4 -0
  68. src/linters/dry/config.py +68 -21
  69. src/linters/dry/constant.py +92 -0
  70. src/linters/dry/constant_matcher.py +223 -0
  71. src/linters/dry/constant_violation_builder.py +98 -0
  72. src/linters/dry/duplicate_storage.py +20 -82
  73. src/linters/dry/file_analyzer.py +15 -50
  74. src/linters/dry/inline_ignore.py +7 -16
  75. src/linters/dry/linter.py +182 -54
  76. src/linters/dry/python_analyzer.py +108 -336
  77. src/linters/dry/python_constant_extractor.py +100 -0
  78. src/linters/dry/single_statement_detector.py +417 -0
  79. src/linters/dry/storage_initializer.py +9 -18
  80. src/linters/dry/token_hasher.py +129 -71
  81. src/linters/dry/typescript_analyzer.py +68 -380
  82. src/linters/dry/typescript_constant_extractor.py +138 -0
  83. src/linters/dry/typescript_statement_detector.py +255 -0
  84. src/linters/dry/typescript_value_extractor.py +70 -0
  85. src/linters/dry/violation_builder.py +4 -0
  86. src/linters/dry/violation_filter.py +9 -5
  87. src/linters/dry/violation_generator.py +71 -14
  88. src/linters/file_header/__init__.py +24 -0
  89. src/linters/file_header/atemporal_detector.py +105 -0
  90. src/linters/file_header/base_parser.py +93 -0
  91. src/linters/file_header/bash_parser.py +66 -0
  92. src/linters/file_header/config.py +140 -0
  93. src/linters/file_header/css_parser.py +70 -0
  94. src/linters/file_header/field_validator.py +72 -0
  95. src/linters/file_header/linter.py +309 -0
  96. src/linters/file_header/markdown_parser.py +130 -0
  97. src/linters/file_header/python_parser.py +42 -0
  98. src/linters/file_header/typescript_parser.py +73 -0
  99. src/linters/file_header/violation_builder.py +79 -0
  100. src/linters/file_placement/config_loader.py +3 -1
  101. src/linters/file_placement/directory_matcher.py +4 -0
  102. src/linters/file_placement/linter.py +74 -31
  103. src/linters/file_placement/pattern_matcher.py +41 -6
  104. src/linters/file_placement/pattern_validator.py +31 -12
  105. src/linters/file_placement/rule_checker.py +12 -7
  106. src/linters/lazy_ignores/__init__.py +43 -0
  107. src/linters/lazy_ignores/config.py +74 -0
  108. src/linters/lazy_ignores/directive_utils.py +164 -0
  109. src/linters/lazy_ignores/header_parser.py +177 -0
  110. src/linters/lazy_ignores/linter.py +158 -0
  111. src/linters/lazy_ignores/matcher.py +168 -0
  112. src/linters/lazy_ignores/python_analyzer.py +209 -0
  113. src/linters/lazy_ignores/rule_id_utils.py +180 -0
  114. src/linters/lazy_ignores/skip_detector.py +298 -0
  115. src/linters/lazy_ignores/types.py +71 -0
  116. src/linters/lazy_ignores/typescript_analyzer.py +146 -0
  117. src/linters/lazy_ignores/violation_builder.py +135 -0
  118. src/linters/lbyl/__init__.py +31 -0
  119. src/linters/lbyl/config.py +63 -0
  120. src/linters/lbyl/linter.py +67 -0
  121. src/linters/lbyl/pattern_detectors/__init__.py +53 -0
  122. src/linters/lbyl/pattern_detectors/base.py +63 -0
  123. src/linters/lbyl/pattern_detectors/dict_key_detector.py +107 -0
  124. src/linters/lbyl/pattern_detectors/division_check_detector.py +232 -0
  125. src/linters/lbyl/pattern_detectors/file_exists_detector.py +220 -0
  126. src/linters/lbyl/pattern_detectors/hasattr_detector.py +119 -0
  127. src/linters/lbyl/pattern_detectors/isinstance_detector.py +119 -0
  128. src/linters/lbyl/pattern_detectors/len_check_detector.py +173 -0
  129. src/linters/lbyl/pattern_detectors/none_check_detector.py +146 -0
  130. src/linters/lbyl/pattern_detectors/string_validator_detector.py +145 -0
  131. src/linters/lbyl/python_analyzer.py +215 -0
  132. src/linters/lbyl/violation_builder.py +354 -0
  133. src/linters/magic_numbers/__init__.py +48 -0
  134. src/linters/magic_numbers/config.py +82 -0
  135. src/linters/magic_numbers/context_analyzer.py +249 -0
  136. src/linters/magic_numbers/linter.py +462 -0
  137. src/linters/magic_numbers/python_analyzer.py +64 -0
  138. src/linters/magic_numbers/typescript_analyzer.py +215 -0
  139. src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
  140. src/linters/magic_numbers/violation_builder.py +98 -0
  141. src/linters/method_property/__init__.py +49 -0
  142. src/linters/method_property/config.py +138 -0
  143. src/linters/method_property/linter.py +414 -0
  144. src/linters/method_property/python_analyzer.py +473 -0
  145. src/linters/method_property/violation_builder.py +119 -0
  146. src/linters/nesting/__init__.py +6 -2
  147. src/linters/nesting/config.py +6 -3
  148. src/linters/nesting/linter.py +31 -34
  149. src/linters/nesting/python_analyzer.py +4 -0
  150. src/linters/nesting/typescript_analyzer.py +6 -11
  151. src/linters/nesting/violation_builder.py +1 -0
  152. src/linters/performance/__init__.py +91 -0
  153. src/linters/performance/config.py +43 -0
  154. src/linters/performance/constants.py +49 -0
  155. src/linters/performance/linter.py +149 -0
  156. src/linters/performance/python_analyzer.py +365 -0
  157. src/linters/performance/regex_analyzer.py +312 -0
  158. src/linters/performance/regex_linter.py +139 -0
  159. src/linters/performance/typescript_analyzer.py +236 -0
  160. src/linters/performance/violation_builder.py +160 -0
  161. src/linters/print_statements/__init__.py +53 -0
  162. src/linters/print_statements/config.py +78 -0
  163. src/linters/print_statements/linter.py +413 -0
  164. src/linters/print_statements/python_analyzer.py +153 -0
  165. src/linters/print_statements/typescript_analyzer.py +125 -0
  166. src/linters/print_statements/violation_builder.py +96 -0
  167. src/linters/srp/__init__.py +3 -3
  168. src/linters/srp/class_analyzer.py +11 -7
  169. src/linters/srp/config.py +12 -6
  170. src/linters/srp/heuristics.py +56 -22
  171. src/linters/srp/linter.py +47 -39
  172. src/linters/srp/python_analyzer.py +55 -20
  173. src/linters/srp/typescript_metrics_calculator.py +110 -50
  174. src/linters/stateless_class/__init__.py +25 -0
  175. src/linters/stateless_class/config.py +58 -0
  176. src/linters/stateless_class/linter.py +349 -0
  177. src/linters/stateless_class/python_analyzer.py +290 -0
  178. src/linters/stringly_typed/__init__.py +36 -0
  179. src/linters/stringly_typed/config.py +189 -0
  180. src/linters/stringly_typed/context_filter.py +451 -0
  181. src/linters/stringly_typed/function_call_violation_builder.py +135 -0
  182. src/linters/stringly_typed/ignore_checker.py +100 -0
  183. src/linters/stringly_typed/ignore_utils.py +51 -0
  184. src/linters/stringly_typed/linter.py +376 -0
  185. src/linters/stringly_typed/python/__init__.py +33 -0
  186. src/linters/stringly_typed/python/analyzer.py +348 -0
  187. src/linters/stringly_typed/python/call_tracker.py +175 -0
  188. src/linters/stringly_typed/python/comparison_tracker.py +257 -0
  189. src/linters/stringly_typed/python/condition_extractor.py +134 -0
  190. src/linters/stringly_typed/python/conditional_detector.py +179 -0
  191. src/linters/stringly_typed/python/constants.py +21 -0
  192. src/linters/stringly_typed/python/match_analyzer.py +94 -0
  193. src/linters/stringly_typed/python/validation_detector.py +189 -0
  194. src/linters/stringly_typed/python/variable_extractor.py +96 -0
  195. src/linters/stringly_typed/storage.py +620 -0
  196. src/linters/stringly_typed/storage_initializer.py +45 -0
  197. src/linters/stringly_typed/typescript/__init__.py +28 -0
  198. src/linters/stringly_typed/typescript/analyzer.py +157 -0
  199. src/linters/stringly_typed/typescript/call_tracker.py +335 -0
  200. src/linters/stringly_typed/typescript/comparison_tracker.py +378 -0
  201. src/linters/stringly_typed/violation_generator.py +419 -0
  202. src/orchestrator/core.py +264 -16
  203. src/orchestrator/language_detector.py +5 -3
  204. src/templates/thailint_config_template.yaml +354 -0
  205. src/utils/project_root.py +138 -16
  206. thailint-0.15.3.dist-info/METADATA +187 -0
  207. thailint-0.15.3.dist-info/RECORD +226 -0
  208. {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info}/WHEEL +1 -1
  209. thailint-0.15.3.dist-info/entry_points.txt +4 -0
  210. src/cli.py +0 -1055
  211. thailint-0.2.0.dist-info/METADATA +0 -980
  212. thailint-0.2.0.dist-info/RECORD +0 -75
  213. thailint-0.2.0.dist-info/entry_points.txt +0 -4
  214. {thailint-0.2.0.dist-info → thailint-0.15.3.dist-info/licenses}/LICENSE +0 -0
src/linters/dry/cache.py CHANGED
@@ -1,31 +1,45 @@
1
1
  """
2
- Purpose: SQLite cache manager for DRY linter with mtime-based invalidation
2
+ Purpose: SQLite storage manager for DRY linter duplicate detection
3
3
 
4
- Scope: Code block storage, cache operations, and duplicate detection queries
4
+ Scope: Code block storage, constant storage, and duplicate detection queries
5
5
 
6
- Overview: Implements persistent caching layer for duplicate code detection using SQLite database.
7
- Stores code blocks with hash values, file locations, and metadata. Provides mtime-based cache
8
- invalidation to detect stale entries. Serves dual purpose as both cache (avoid re-hashing) and
9
- hash table (query duplicates across project). Includes indexes for fast hash lookups enabling
10
- cross-file duplicate detection with minimal overhead.
6
+ Overview: Implements in-memory or temporary-file SQLite storage for duplicate code detection
7
+ and duplicate constants detection. Stores code blocks with hash values and constants with
8
+ name/value pairs, enabling cross-file duplicate detection during a single linter run.
9
+ Supports both :memory: mode (fast, RAM-only) and tempfile mode (disk-backed for large projects).
10
+ No persistence between runs - storage is cleared when linter completes. Includes indexes for
11
+ fast hash lookups and constant name lookups enabling efficient cross-file detection.
11
12
 
12
- Dependencies: Python sqlite3 module (stdlib), pathlib.Path, dataclasses
13
+ Dependencies: Python sqlite3 module (stdlib), tempfile module (stdlib), pathlib.Path, dataclasses
13
14
 
14
15
  Exports: CodeBlock dataclass, DRYCache class
15
16
 
16
- Interfaces: DRYCache.__init__, is_fresh, load, save, find_duplicates_by_hash, get_blocks_for_file,
17
- add_blocks, cleanup_stale, close
17
+ Interfaces: DRYCache.__init__(storage_mode), add_blocks(file_path, blocks),
18
+ find_duplicates_by_hash(hash_value), duplicate_hashes, add_constants(file_path, constants),
19
+ all_constants, get_duplicate_constant_names(), close()
18
20
 
19
- Implementation: SQLite with two tables (files, code_blocks), indexed on hash_value for performance,
20
- ACID transactions for reliability, foreign key constraints for data integrity
21
+ Implementation: SQLite with three tables (files, code_blocks, constants), indexed for performance,
22
+ storage_mode determines :memory: vs tempfile location, ACID transactions for reliability
23
+
24
+ Suppressions:
25
+ - consider-using-with: Tempfile managed by class lifecycle, not context manager
21
26
  """
22
27
 
28
+ from __future__ import annotations
29
+
23
30
  import sqlite3
31
+ import tempfile
24
32
  from dataclasses import dataclass
25
33
  from pathlib import Path
34
+ from typing import TYPE_CHECKING
35
+
36
+ from src.core.constants import StorageMode
26
37
 
27
38
  from .cache_query import CacheQueryService
28
39
 
40
+ if TYPE_CHECKING:
41
+ from .constant import ConstantInfo
42
+
29
43
 
30
44
  @dataclass
31
45
  class CodeBlock:
@@ -39,20 +53,32 @@ class CodeBlock:
39
53
 
40
54
 
41
55
  class DRYCache:
42
- """SQLite-backed cache for duplicate detection."""
56
+ """SQLite-backed storage for duplicate detection."""
43
57
 
44
58
  SCHEMA_VERSION = 1
45
59
 
46
- def __init__(self, cache_path: Path) -> None:
47
- """Initialize cache with SQLite database.
60
+ def __init__(self, storage_mode: str = "memory") -> None:
61
+ """Initialize storage with SQLite database.
48
62
 
49
63
  Args:
50
- cache_path: Path to SQLite database file
64
+ storage_mode: Storage mode - "memory" (default) or "tempfile"
51
65
  """
52
- # Ensure parent directory exists
53
- cache_path.parent.mkdir(parents=True, exist_ok=True)
66
+ self._storage_mode = storage_mode
67
+ self._tempfile = None
68
+
69
+ # Create SQLite connection based on storage mode
70
+ if storage_mode == StorageMode.MEMORY:
71
+ self.db = sqlite3.connect(":memory:")
72
+ elif storage_mode == StorageMode.TEMPFILE:
73
+ # Create temporary file that auto-deletes on close
74
+ # pylint: disable=consider-using-with
75
+ # Justification: tempfile must remain open for SQLite connection lifetime.
76
+ # It is explicitly closed in close() method when cache is finalized.
77
+ self._tempfile = tempfile.NamedTemporaryFile(suffix=".db", delete=True)
78
+ self.db = sqlite3.connect(self._tempfile.name)
79
+ else:
80
+ raise ValueError(f"Invalid storage_mode: {storage_mode}")
54
81
 
55
- self.db = sqlite3.connect(str(cache_path))
56
82
  self._query_service = CacheQueryService()
57
83
 
58
84
  # Create schema
@@ -80,70 +106,39 @@ class DRYCache:
80
106
  self.db.execute("CREATE INDEX IF NOT EXISTS idx_hash_value ON code_blocks(hash_value)")
81
107
  self.db.execute("CREATE INDEX IF NOT EXISTS idx_file_path ON code_blocks(file_path)")
82
108
 
83
- self.db.commit()
84
-
85
- def is_fresh(self, file_path: Path, current_mtime: float) -> bool:
86
- """Check if cached data is fresh (mtime matches).
87
-
88
- Args:
89
- file_path: Path to file
90
- current_mtime: Current modification time
91
-
92
- Returns:
93
- True if cache is fresh, False if stale or missing
94
- """
95
- cursor = self.db.execute("SELECT mtime FROM files WHERE file_path = ?", (str(file_path),))
96
- row = cursor.fetchone()
97
-
98
- if not row:
99
- return False # Not in cache
100
-
101
- cached_mtime = row[0]
102
- return cached_mtime == current_mtime
103
-
104
- def load(self, file_path: Path) -> list[CodeBlock]:
105
- """Load cached code blocks for file.
106
-
107
- Args:
108
- file_path: Path to file
109
-
110
- Returns:
111
- List of CodeBlock instances from cache
112
- """
113
- cursor = self.db.execute(
114
- """SELECT hash_value, start_line, end_line, snippet
115
- FROM code_blocks
116
- WHERE file_path = ?""",
117
- (str(file_path),),
109
+ # Constants table for duplicate constant detection
110
+ self.db.execute(
111
+ """CREATE TABLE IF NOT EXISTS constants (
112
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
113
+ file_path TEXT NOT NULL,
114
+ name TEXT NOT NULL,
115
+ line_number INTEGER NOT NULL,
116
+ value TEXT,
117
+ FOREIGN KEY (file_path) REFERENCES files(file_path) ON DELETE CASCADE
118
+ )"""
118
119
  )
120
+ self.db.execute("CREATE INDEX IF NOT EXISTS idx_constant_name ON constants(name)")
119
121
 
120
- blocks = []
121
- for hash_val, start, end, snippet in cursor:
122
- block = CodeBlock(
123
- file_path=file_path,
124
- start_line=start,
125
- end_line=end,
126
- snippet=snippet,
127
- hash_value=hash_val,
128
- )
129
- blocks.append(block)
130
-
131
- return blocks
122
+ self.db.commit()
132
123
 
133
- def save(self, file_path: Path, mtime: float, blocks: list[CodeBlock]) -> None:
134
- """Save code blocks to cache.
124
+ def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
125
+ """Add code blocks to storage.
135
126
 
136
127
  Args:
137
- file_path: Path to file
138
- mtime: File modification time
139
- blocks: List of CodeBlock instances to cache
128
+ file_path: Path to source file
129
+ blocks: List of CodeBlock instances to store
140
130
  """
141
- # Delete old data for this file
142
- self.db.execute("DELETE FROM files WHERE file_path = ?", (str(file_path),))
131
+ if not blocks:
132
+ return
143
133
 
144
134
  # Insert file metadata
135
+ try:
136
+ mtime = file_path.stat().st_mtime
137
+ except OSError:
138
+ mtime = 0.0 # File doesn't exist, use placeholder
139
+
145
140
  self.db.execute(
146
- "INSERT INTO files (file_path, mtime, hash_count) VALUES (?, ?, ?)",
141
+ "INSERT OR REPLACE INTO files (file_path, mtime, hash_count) VALUES (?, ?, ?)",
147
142
  (str(file_path), mtime, len(blocks)),
148
143
  )
149
144
 
@@ -164,23 +159,6 @@ class DRYCache:
164
159
 
165
160
  self.db.commit()
166
161
 
167
- def cleanup_stale(self, max_age_days: int) -> None:
168
- """Remove cache entries older than max_age_days.
169
-
170
- Args:
171
- max_age_days: Maximum age in days for cache entries
172
- """
173
- # Use parameterized query to prevent SQL injection
174
- self.db.execute(
175
- """DELETE FROM files
176
- WHERE last_scanned < datetime('now', ? || ' days')""",
177
- (f"-{max_age_days}",),
178
- )
179
-
180
- # Vacuum to reclaim space
181
- self.db.execute("VACUUM")
182
- self.db.commit()
183
-
184
162
  def find_duplicates_by_hash(self, hash_value: int) -> list[CodeBlock]:
185
163
  """Find all code blocks with the given hash value.
186
164
 
@@ -205,14 +183,84 @@ class DRYCache:
205
183
 
206
184
  return blocks
207
185
 
208
- def get_duplicate_hashes(self) -> list[int]:
209
- """Get all hash values that appear 2+ times.
186
+ @property
187
+ def duplicate_hashes(self) -> list[int]:
188
+ """Hash values that appear 2+ times.
210
189
 
211
190
  Returns:
212
191
  List of hash values with 2 or more occurrences
213
192
  """
214
193
  return self._query_service.get_duplicate_hashes(self.db)
215
194
 
195
+ def add_constants(
196
+ self,
197
+ file_path: Path,
198
+ constants: list[ConstantInfo],
199
+ ) -> None:
200
+ """Add constants to storage.
201
+
202
+ Args:
203
+ file_path: Path to source file
204
+ constants: List of ConstantInfo instances to store
205
+ """
206
+ if not constants:
207
+ return
208
+
209
+ for const in constants:
210
+ self.db.execute(
211
+ """INSERT INTO constants
212
+ (file_path, name, line_number, value)
213
+ VALUES (?, ?, ?, ?)""",
214
+ (
215
+ str(file_path),
216
+ const.name,
217
+ const.line_number,
218
+ const.value,
219
+ ),
220
+ )
221
+
222
+ self.db.commit()
223
+
224
+ @property
225
+ def all_constants(self) -> list[tuple[str, str, int, str | None]]:
226
+ """All constants from storage.
227
+
228
+ Returns:
229
+ List of tuples: (file_path, name, line_number, value)
230
+ """
231
+ cursor = self.db.execute("SELECT file_path, name, line_number, value FROM constants")
232
+ return cursor.fetchall()
233
+
234
+ def get_duplicate_constant_names(self) -> list[str]:
235
+ """Get constant names that appear in 2+ files.
236
+
237
+ Returns:
238
+ List of constant names appearing in multiple files
239
+ """
240
+ cursor = self.db.execute(
241
+ """SELECT name FROM constants
242
+ GROUP BY name
243
+ HAVING COUNT(DISTINCT file_path) >= 2"""
244
+ )
245
+ return [row[0] for row in cursor.fetchall()]
246
+
247
+ def get_constants_by_name(self, name: str) -> list[tuple[str, int, str | None]]:
248
+ """Get all locations of a constant by name.
249
+
250
+ Args:
251
+ name: The constant name to search for
252
+
253
+ Returns:
254
+ List of tuples: (file_path, line_number, value)
255
+ """
256
+ cursor = self.db.execute(
257
+ "SELECT file_path, line_number, value FROM constants WHERE name = ?",
258
+ (name,),
259
+ )
260
+ return cursor.fetchall()
261
+
216
262
  def close(self) -> None:
217
- """Close database connection."""
263
+ """Close database connection and cleanup tempfile if used."""
218
264
  self.db.close()
265
+ if self._tempfile:
266
+ self._tempfile.close()
@@ -22,6 +22,10 @@ import sqlite3
22
22
  class CacheQueryService:
23
23
  """Handles cache database queries."""
24
24
 
25
+ def __init__(self) -> None:
26
+ """Initialize the cache query service."""
27
+ pass # Stateless query service for database operations
28
+
25
29
  def get_duplicate_hashes(self, db: sqlite3.Connection) -> list[int]:
26
30
  """Get all hash values that appear 2+ times.
27
31
 
src/linters/dry/config.py CHANGED
@@ -15,11 +15,21 @@ Exports: DRYConfig dataclass
15
15
  Interfaces: DRYConfig.__init__, DRYConfig.from_dict(config: dict) -> DRYConfig
16
16
 
17
17
  Implementation: Dataclass with field defaults, __post_init__ validation, and dict-based construction
18
+
19
+ Suppressions:
20
+ - too-many-instance-attributes: Configuration dataclass with related settings
18
21
  """
19
22
 
20
23
  from dataclasses import dataclass, field
21
24
  from typing import Any
22
25
 
26
+ from src.core.constants import StorageMode
27
+
28
+ # Default configuration constants
29
+ DEFAULT_MIN_DUPLICATE_LINES = 3
30
+ DEFAULT_MIN_DUPLICATE_TOKENS = 30
31
+ DEFAULT_DETECT_DUPLICATE_CONSTANTS = True
32
+
23
33
 
24
34
  @dataclass
25
35
  class DRYConfig: # pylint: disable=too-many-instance-attributes
@@ -27,14 +37,14 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
27
37
 
28
38
  Note: Pylint too-many-instance-attributes disabled. This is a configuration
29
39
  dataclass serving as a data container for related DRY linter settings.
30
- All 12 attributes are cohesively related (detection thresholds, language
31
- overrides, caching, filtering). Splitting would reduce cohesion and make
40
+ All attributes are cohesively related (detection thresholds, language
41
+ overrides, storage mode, filtering). Splitting would reduce cohesion and make
32
42
  configuration loading more complex without meaningful benefit.
33
43
  """
34
44
 
35
45
  enabled: bool = False # Must be explicitly enabled
36
- min_duplicate_lines: int = 3
37
- min_duplicate_tokens: int = 30
46
+ min_duplicate_lines: int = DEFAULT_MIN_DUPLICATE_LINES
47
+ min_duplicate_tokens: int = DEFAULT_MIN_DUPLICATE_TOKENS
38
48
  min_occurrences: int = 2 # Minimum occurrences to report (default: 2)
39
49
 
40
50
  # Language-specific overrides
@@ -42,10 +52,8 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
42
52
  typescript_min_occurrences: int | None = None
43
53
  javascript_min_occurrences: int | None = None
44
54
 
45
- # Cache settings
46
- cache_enabled: bool = True # ON by default for performance
47
- cache_path: str = ".thailint-cache/dry.db"
48
- cache_max_age_days: int = 30
55
+ # Storage settings
56
+ storage_mode: str = "memory" # Options: "memory" (default) or "tempfile"
49
57
 
50
58
  # Ignore patterns
51
59
  ignore_patterns: list[str] = field(default_factory=lambda: ["tests/", "__init__.py"])
@@ -58,18 +66,34 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
58
66
  }
59
67
  )
60
68
 
69
+ # Duplicate constants detection
70
+ detect_duplicate_constants: bool = DEFAULT_DETECT_DUPLICATE_CONSTANTS
71
+ min_constant_occurrences: int = 2 # Minimum files with same constant to report
72
+
73
+ # Language-specific overrides for constant detection
74
+ python_min_constant_occurrences: int | None = None
75
+ typescript_min_constant_occurrences: int | None = None
76
+
61
77
  def __post_init__(self) -> None:
62
78
  """Validate configuration values."""
63
- if self.min_duplicate_lines <= 0:
79
+ self._validate_positive_fields()
80
+ valid_modes = (StorageMode.MEMORY, StorageMode.TEMPFILE)
81
+ if self.storage_mode not in valid_modes:
64
82
  raise ValueError(
65
- f"min_duplicate_lines must be positive, got {self.min_duplicate_lines}"
83
+ f"storage_mode must be 'memory' or 'tempfile', got '{self.storage_mode}'"
66
84
  )
67
- if self.min_duplicate_tokens <= 0:
68
- raise ValueError(
69
- f"min_duplicate_tokens must be positive, got {self.min_duplicate_tokens}"
70
- )
71
- if self.min_occurrences <= 0:
72
- raise ValueError(f"min_occurrences must be positive, got {self.min_occurrences}")
85
+
86
+ def _validate_positive_fields(self) -> None:
87
+ """Validate that required fields are positive."""
88
+ positive_fields = [
89
+ ("min_duplicate_lines", self.min_duplicate_lines),
90
+ ("min_duplicate_tokens", self.min_duplicate_tokens),
91
+ ("min_occurrences", self.min_occurrences),
92
+ ("min_constant_occurrences", self.min_constant_occurrences),
93
+ ]
94
+ for name, value in positive_fields:
95
+ if value <= 0:
96
+ raise ValueError(f"{name} must be positive, got {value}")
73
97
 
74
98
  def get_min_occurrences_for_language(self, language: str) -> int:
75
99
  """Get minimum occurrences threshold for a specific language.
@@ -91,6 +115,25 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
91
115
  override = language_overrides.get(language_lower)
92
116
  return override if override is not None else self.min_occurrences
93
117
 
118
+ def get_min_constant_occurrences_for_language(self, language: str) -> int:
119
+ """Get minimum constant occurrences threshold for a specific language.
120
+
121
+ Args:
122
+ language: Language identifier (e.g., "python", "typescript")
123
+
124
+ Returns:
125
+ Minimum constant occurrences threshold for the language, or global default
126
+ """
127
+ language_lower = language.lower()
128
+
129
+ language_overrides = {
130
+ "python": self.python_min_constant_occurrences,
131
+ "typescript": self.typescript_min_constant_occurrences,
132
+ }
133
+
134
+ override = language_overrides.get(language_lower)
135
+ return override if override is not None else self.min_constant_occurrences
136
+
94
137
  @classmethod
95
138
  def from_dict(cls, config: dict[str, Any]) -> "DRYConfig":
96
139
  """Load configuration from dictionary.
@@ -116,15 +159,19 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
116
159
 
117
160
  return cls(
118
161
  enabled=config.get("enabled", False),
119
- min_duplicate_lines=config.get("min_duplicate_lines", 3),
120
- min_duplicate_tokens=config.get("min_duplicate_tokens", 30),
162
+ min_duplicate_lines=config.get("min_duplicate_lines", DEFAULT_MIN_DUPLICATE_LINES),
163
+ min_duplicate_tokens=config.get("min_duplicate_tokens", DEFAULT_MIN_DUPLICATE_TOKENS),
121
164
  min_occurrences=config.get("min_occurrences", 2),
122
165
  python_min_occurrences=python_config.get("min_occurrences"),
123
166
  typescript_min_occurrences=typescript_config.get("min_occurrences"),
124
167
  javascript_min_occurrences=javascript_config.get("min_occurrences"),
125
- cache_enabled=config.get("cache_enabled", True),
126
- cache_path=config.get("cache_path", ".thailint-cache/dry.db"),
127
- cache_max_age_days=config.get("cache_max_age_days", 30),
168
+ storage_mode=config.get("storage_mode", "memory"),
128
169
  ignore_patterns=config.get("ignore", []),
129
170
  filters=filters,
171
+ detect_duplicate_constants=config.get(
172
+ "detect_duplicate_constants", DEFAULT_DETECT_DUPLICATE_CONSTANTS
173
+ ),
174
+ min_constant_occurrences=config.get("min_constant_occurrences", 2),
175
+ python_min_constant_occurrences=python_config.get("min_constant_occurrences"),
176
+ typescript_min_constant_occurrences=typescript_config.get("min_constant_occurrences"),
130
177
  )
@@ -0,0 +1,92 @@
1
+ """
2
+ Purpose: Dataclasses for duplicate constants detection in DRY linter
3
+
4
+ Scope: Data structures for constant extraction and cross-file detection
5
+
6
+ Overview: Provides dataclasses for representing constants extracted from source code and their
7
+ locations across multiple files. ConstantInfo stores extracted constant metadata (name, line,
8
+ value) from a single file. ConstantLocation represents where a constant appears across the
9
+ project. ConstantGroup represents a group of related constants (exact or fuzzy matches) for
10
+ violation reporting. These structures support the duplicate constants detection feature that
11
+ identifies when the same constant name appears in multiple files.
12
+
13
+ Dependencies: Python dataclasses module, pathlib for Path types
14
+
15
+ Exports: ConstantInfo, ConstantLocation, ConstantGroup dataclasses
16
+
17
+ Interfaces: Dataclass constructors with named fields
18
+
19
+ Implementation: Immutable dataclasses with optional fields for extracted value context
20
+ """
21
+
22
+ import re
23
+ from dataclasses import dataclass, field
24
+ from pathlib import Path
25
+
26
+ # Shared pattern for ALL_CAPS constant names (public only, no leading underscore)
27
+ # Used by both Python and TypeScript constant extractors
28
+ # Requires at least 2 characters to exclude single-letter type params (P, T, K, V)
29
+ CONSTANT_NAME_PATTERN = re.compile(r"^[A-Z][A-Z0-9_]+$")
30
+
31
+
32
+ @dataclass
33
+ class ConstantInfo:
34
+ """Information about a constant extracted from source code.
35
+
36
+ Represents a single constant definition found during file analysis.
37
+ Used during the collection phase before cross-file matching.
38
+ """
39
+
40
+ name: str # Constant name (e.g., "API_TIMEOUT")
41
+ line_number: int # Line where constant is defined
42
+ value: str | None = None # Optional: the value (for violation message context)
43
+
44
+
45
+ @dataclass
46
+ class ConstantLocation:
47
+ """Location of a constant in the project.
48
+
49
+ Represents where a specific constant appears, including file path,
50
+ line number, and the value assigned. Used for cross-file reporting.
51
+ """
52
+
53
+ file_path: Path
54
+ line_number: int
55
+ name: str
56
+ value: str | None = None
57
+
58
+
59
+ @dataclass
60
+ class ConstantGroup:
61
+ """A group of related constants for violation reporting.
62
+
63
+ Groups constants that match (either exactly or via fuzzy matching)
64
+ across multiple files. Used by the violation builder to generate
65
+ comprehensive violation messages.
66
+ """
67
+
68
+ # The canonical name (first seen or most common)
69
+ canonical_name: str
70
+
71
+ # All locations where this constant (or fuzzy match) appears
72
+ locations: list[ConstantLocation] = field(default_factory=list)
73
+
74
+ # All names in this group (for fuzzy matches, may include variants)
75
+ all_names: set[str] = field(default_factory=set)
76
+
77
+ # Whether this is a fuzzy match (True) or exact match (False)
78
+ is_fuzzy_match: bool = False
79
+
80
+ def add_location(self, location: ConstantLocation) -> None:
81
+ """Add a location to this group.
82
+
83
+ Args:
84
+ location: The constant location to add
85
+ """
86
+ self.locations.append(location)
87
+ self.all_names.add(location.name)
88
+
89
+ @property
90
+ def file_count(self) -> int:
91
+ """Number of unique files containing this constant."""
92
+ return len({loc.file_path for loc in self.locations})