thailint 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +1 -0
- src/cli/__init__.py +27 -0
- src/cli/__main__.py +22 -0
- src/cli/config.py +478 -0
- src/cli/linters/__init__.py +58 -0
- src/cli/linters/code_patterns.py +372 -0
- src/cli/linters/code_smells.py +450 -0
- src/cli/linters/documentation.py +155 -0
- src/cli/linters/shared.py +89 -0
- src/cli/linters/structure.py +313 -0
- src/cli/linters/structure_quality.py +316 -0
- src/cli/main.py +120 -0
- src/cli/utils.py +395 -0
- src/cli_main.py +34 -0
- src/core/types.py +13 -0
- src/core/violation_utils.py +69 -0
- src/linter_config/ignore.py +32 -16
- src/linters/collection_pipeline/linter.py +2 -2
- src/linters/dry/block_filter.py +97 -1
- src/linters/dry/cache.py +94 -6
- src/linters/dry/config.py +47 -10
- src/linters/dry/constant.py +92 -0
- src/linters/dry/constant_matcher.py +214 -0
- src/linters/dry/constant_violation_builder.py +98 -0
- src/linters/dry/linter.py +89 -48
- src/linters/dry/python_analyzer.py +12 -415
- src/linters/dry/python_constant_extractor.py +101 -0
- src/linters/dry/single_statement_detector.py +415 -0
- src/linters/dry/token_hasher.py +5 -5
- src/linters/dry/typescript_analyzer.py +5 -354
- src/linters/dry/typescript_constant_extractor.py +134 -0
- src/linters/dry/typescript_statement_detector.py +255 -0
- src/linters/dry/typescript_value_extractor.py +66 -0
- src/linters/file_header/linter.py +2 -2
- src/linters/file_placement/linter.py +2 -2
- src/linters/file_placement/pattern_matcher.py +19 -5
- src/linters/magic_numbers/linter.py +8 -67
- src/linters/magic_numbers/typescript_ignore_checker.py +81 -0
- src/linters/nesting/linter.py +12 -9
- src/linters/print_statements/linter.py +7 -24
- src/linters/srp/class_analyzer.py +9 -9
- src/linters/srp/heuristics.py +2 -2
- src/linters/srp/linter.py +2 -2
- src/linters/stateless_class/linter.py +2 -2
- src/linters/stringly_typed/__init__.py +36 -0
- src/linters/stringly_typed/config.py +190 -0
- src/linters/stringly_typed/context_filter.py +451 -0
- src/linters/stringly_typed/function_call_violation_builder.py +137 -0
- src/linters/stringly_typed/ignore_checker.py +102 -0
- src/linters/stringly_typed/ignore_utils.py +51 -0
- src/linters/stringly_typed/linter.py +344 -0
- src/linters/stringly_typed/python/__init__.py +33 -0
- src/linters/stringly_typed/python/analyzer.py +344 -0
- src/linters/stringly_typed/python/call_tracker.py +172 -0
- src/linters/stringly_typed/python/comparison_tracker.py +252 -0
- src/linters/stringly_typed/python/condition_extractor.py +131 -0
- src/linters/stringly_typed/python/conditional_detector.py +176 -0
- src/linters/stringly_typed/python/constants.py +21 -0
- src/linters/stringly_typed/python/match_analyzer.py +88 -0
- src/linters/stringly_typed/python/validation_detector.py +186 -0
- src/linters/stringly_typed/python/variable_extractor.py +96 -0
- src/linters/stringly_typed/storage.py +630 -0
- src/linters/stringly_typed/storage_initializer.py +45 -0
- src/linters/stringly_typed/typescript/__init__.py +28 -0
- src/linters/stringly_typed/typescript/analyzer.py +157 -0
- src/linters/stringly_typed/typescript/call_tracker.py +329 -0
- src/linters/stringly_typed/typescript/comparison_tracker.py +372 -0
- src/linters/stringly_typed/violation_generator.py +376 -0
- src/orchestrator/core.py +241 -12
- {thailint-0.10.0.dist-info → thailint-0.12.0.dist-info}/METADATA +9 -3
- {thailint-0.10.0.dist-info → thailint-0.12.0.dist-info}/RECORD +74 -28
- thailint-0.12.0.dist-info/entry_points.txt +4 -0
- src/cli.py +0 -2141
- thailint-0.10.0.dist-info/entry_points.txt +0 -4
- {thailint-0.10.0.dist-info → thailint-0.12.0.dist-info}/WHEEL +0 -0
- {thailint-0.10.0.dist-info → thailint-0.12.0.dist-info}/licenses/LICENSE +0 -0
src/linter_config/ignore.py
CHANGED
|
@@ -56,6 +56,7 @@ class IgnoreDirectiveParser:
|
|
|
56
56
|
"""
|
|
57
57
|
self.project_root = project_root or Path.cwd()
|
|
58
58
|
self.repo_patterns = self._load_repo_ignores()
|
|
59
|
+
self._ignore_cache: dict[str, bool] = {} # Cache for is_ignored results
|
|
59
60
|
|
|
60
61
|
def _load_repo_ignores(self) -> list[str]:
|
|
61
62
|
"""Load global ignore patterns from .thailintignore or .thailint.yaml."""
|
|
@@ -112,26 +113,20 @@ class IgnoreDirectiveParser:
|
|
|
112
113
|
return []
|
|
113
114
|
|
|
114
115
|
def is_ignored(self, file_path: Path) -> bool:
|
|
115
|
-
"""Check if file matches repository-level ignore patterns.
|
|
116
|
+
"""Check if file matches repository-level ignore patterns (cached)."""
|
|
117
|
+
path_str = str(file_path)
|
|
118
|
+
if path_str in self._ignore_cache:
|
|
119
|
+
return self._ignore_cache[path_str]
|
|
116
120
|
|
|
117
|
-
|
|
118
|
-
file_path: Path to check against ignore patterns.
|
|
119
|
-
|
|
120
|
-
Returns:
|
|
121
|
-
True if file should be ignored.
|
|
122
|
-
"""
|
|
123
|
-
# Convert to string relative to project root if possible
|
|
121
|
+
# Convert to relative path for pattern matching
|
|
124
122
|
try:
|
|
125
|
-
|
|
126
|
-
path_str = str(relative_path)
|
|
123
|
+
check_path = str(file_path.relative_to(self.project_root))
|
|
127
124
|
except ValueError:
|
|
128
|
-
|
|
129
|
-
path_str = str(file_path)
|
|
125
|
+
check_path = path_str
|
|
130
126
|
|
|
131
|
-
for
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
return False
|
|
127
|
+
result = any(self._matches_pattern(check_path, p) for p in self.repo_patterns)
|
|
128
|
+
self._ignore_cache[path_str] = result
|
|
129
|
+
return result
|
|
135
130
|
|
|
136
131
|
def _matches_pattern(self, path: str, pattern: str) -> bool:
|
|
137
132
|
"""Check if path matches gitignore-style pattern.
|
|
@@ -473,3 +468,24 @@ class IgnoreDirectiveParser:
|
|
|
473
468
|
|
|
474
469
|
# Alias for backwards compatibility
|
|
475
470
|
IgnoreParser = IgnoreDirectiveParser
|
|
471
|
+
|
|
472
|
+
# Singleton pattern for performance: YAML parsing repeated 9x consumed 44% overhead
|
|
473
|
+
_CACHED_PARSER: IgnoreDirectiveParser | None = None
|
|
474
|
+
_CACHED_PROJECT_ROOT: Path | None = None
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def get_ignore_parser(project_root: Path | None = None) -> IgnoreDirectiveParser:
|
|
478
|
+
"""Get cached ignore parser instance (singleton pattern for performance)."""
|
|
479
|
+
global _CACHED_PARSER, _CACHED_PROJECT_ROOT # pylint: disable=global-statement
|
|
480
|
+
effective_root = project_root or Path.cwd()
|
|
481
|
+
if _CACHED_PARSER is None or _CACHED_PROJECT_ROOT != effective_root:
|
|
482
|
+
_CACHED_PARSER = IgnoreDirectiveParser(effective_root)
|
|
483
|
+
_CACHED_PROJECT_ROOT = effective_root
|
|
484
|
+
return _CACHED_PARSER
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def clear_ignore_parser_cache() -> None:
|
|
488
|
+
"""Clear cached parser for test isolation or project root changes."""
|
|
489
|
+
global _CACHED_PARSER, _CACHED_PROJECT_ROOT # pylint: disable=global-statement
|
|
490
|
+
_CACHED_PARSER = None
|
|
491
|
+
_CACHED_PROJECT_ROOT = None
|
|
@@ -27,7 +27,7 @@ from pathlib import Path
|
|
|
27
27
|
|
|
28
28
|
from src.core.base import BaseLintContext, BaseLintRule
|
|
29
29
|
from src.core.types import Severity, Violation
|
|
30
|
-
from src.linter_config.ignore import
|
|
30
|
+
from src.linter_config.ignore import get_ignore_parser
|
|
31
31
|
|
|
32
32
|
from .config import CollectionPipelineConfig
|
|
33
33
|
from .detector import PatternMatch, PipelinePatternDetector
|
|
@@ -38,7 +38,7 @@ class CollectionPipelineRule(BaseLintRule): # thailint: ignore[srp,dry]
|
|
|
38
38
|
|
|
39
39
|
def __init__(self) -> None:
|
|
40
40
|
"""Initialize the rule with ignore parser."""
|
|
41
|
-
self._ignore_parser =
|
|
41
|
+
self._ignore_parser = get_ignore_parser()
|
|
42
42
|
|
|
43
43
|
@property
|
|
44
44
|
def rule_id(self) -> str:
|
src/linters/dry/block_filter.py
CHANGED
|
@@ -10,7 +10,8 @@ Overview: Provides an extensible architecture for filtering duplicate code block
|
|
|
10
10
|
|
|
11
11
|
Dependencies: ast, re, typing
|
|
12
12
|
|
|
13
|
-
Exports: BaseBlockFilter, BlockFilterRegistry, KeywordArgumentFilter, ImportGroupFilter
|
|
13
|
+
Exports: BaseBlockFilter, BlockFilterRegistry, KeywordArgumentFilter, ImportGroupFilter,
|
|
14
|
+
LoggerCallFilter, ExceptionReraiseFilter
|
|
14
15
|
|
|
15
16
|
Interfaces: BaseBlockFilter.should_filter(code_block, file_content) -> bool
|
|
16
17
|
|
|
@@ -196,6 +197,99 @@ class ImportGroupFilter(BaseBlockFilter):
|
|
|
196
197
|
return "import_group_filter"
|
|
197
198
|
|
|
198
199
|
|
|
200
|
+
class LoggerCallFilter(BaseBlockFilter):
|
|
201
|
+
"""Filters single-line logger calls that are idiomatic but appear similar.
|
|
202
|
+
|
|
203
|
+
Detects patterns like:
|
|
204
|
+
logger.debug(f"Command: {cmd}")
|
|
205
|
+
logger.info("Starting process...")
|
|
206
|
+
logging.warning("...")
|
|
207
|
+
|
|
208
|
+
These are contextually different despite structural similarity.
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
def __init__(self) -> None:
|
|
212
|
+
"""Initialize the logger call filter."""
|
|
213
|
+
# Pattern matches: logger.level(...) or logging.level(...)
|
|
214
|
+
self._logger_pattern = re.compile(
|
|
215
|
+
r"^\s*(self\.)?(logger|logging|log)\."
|
|
216
|
+
r"(debug|info|warning|error|critical|exception|log)\s*\("
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def should_filter(self, block: CodeBlock, file_content: str) -> bool:
|
|
220
|
+
"""Check if block is primarily single-line logger calls.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
block: Code block to evaluate
|
|
224
|
+
file_content: Full file content
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
True if block should be filtered
|
|
228
|
+
"""
|
|
229
|
+
lines = file_content.split("\n")[block.start_line - 1 : block.end_line]
|
|
230
|
+
non_empty = [s for line in lines if (s := line.strip())]
|
|
231
|
+
|
|
232
|
+
if not non_empty:
|
|
233
|
+
return False
|
|
234
|
+
|
|
235
|
+
# Filter if it's a single line that's a logger call
|
|
236
|
+
if len(non_empty) == 1:
|
|
237
|
+
return bool(self._logger_pattern.match(non_empty[0]))
|
|
238
|
+
|
|
239
|
+
return False
|
|
240
|
+
|
|
241
|
+
@property
|
|
242
|
+
def name(self) -> str:
|
|
243
|
+
"""Filter name."""
|
|
244
|
+
return "logger_call_filter"
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class ExceptionReraiseFilter(BaseBlockFilter):
|
|
248
|
+
"""Filters idiomatic exception re-raising patterns.
|
|
249
|
+
|
|
250
|
+
Detects patterns like:
|
|
251
|
+
except SomeError as e:
|
|
252
|
+
raise NewError(...) from e
|
|
253
|
+
|
|
254
|
+
These are Python best practices for exception chaining.
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
def __init__(self) -> None:
|
|
258
|
+
"""Initialize the exception reraise filter."""
|
|
259
|
+
pass # Stateless filter
|
|
260
|
+
|
|
261
|
+
def should_filter(self, block: CodeBlock, file_content: str) -> bool:
|
|
262
|
+
"""Check if block is an exception re-raise pattern.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
block: Code block to evaluate
|
|
266
|
+
file_content: Full file content
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
True if block should be filtered
|
|
270
|
+
"""
|
|
271
|
+
lines = file_content.split("\n")[block.start_line - 1 : block.end_line]
|
|
272
|
+
stripped_lines = [s for line in lines if (s := line.strip())]
|
|
273
|
+
|
|
274
|
+
if len(stripped_lines) != 2:
|
|
275
|
+
return False
|
|
276
|
+
|
|
277
|
+
return self._is_except_raise_pattern(stripped_lines)
|
|
278
|
+
|
|
279
|
+
@staticmethod
|
|
280
|
+
def _is_except_raise_pattern(lines: list[str]) -> bool:
|
|
281
|
+
"""Check if lines form an except/raise pattern."""
|
|
282
|
+
first, second = lines[0], lines[1]
|
|
283
|
+
is_except = first.startswith("except ") and first.endswith(":")
|
|
284
|
+
is_raise = second.startswith("raise ") and " from " in second
|
|
285
|
+
return is_except and is_raise
|
|
286
|
+
|
|
287
|
+
@property
|
|
288
|
+
def name(self) -> str:
|
|
289
|
+
"""Filter name."""
|
|
290
|
+
return "exception_reraise_filter"
|
|
291
|
+
|
|
292
|
+
|
|
199
293
|
class BlockFilterRegistry:
|
|
200
294
|
"""Registry for managing duplicate block filters."""
|
|
201
295
|
|
|
@@ -262,5 +356,7 @@ def create_default_registry() -> BlockFilterRegistry:
|
|
|
262
356
|
# Register built-in filters
|
|
263
357
|
registry.register(KeywordArgumentFilter(threshold=DEFAULT_KEYWORD_ARG_THRESHOLD))
|
|
264
358
|
registry.register(ImportGroupFilter())
|
|
359
|
+
registry.register(LoggerCallFilter())
|
|
360
|
+
registry.register(ExceptionReraiseFilter())
|
|
265
361
|
|
|
266
362
|
return registry
|
src/linters/dry/cache.py
CHANGED
|
@@ -1,32 +1,40 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Purpose: SQLite storage manager for DRY linter duplicate detection
|
|
3
3
|
|
|
4
|
-
Scope: Code block storage and duplicate detection queries
|
|
4
|
+
Scope: Code block storage, constant storage, and duplicate detection queries
|
|
5
5
|
|
|
6
|
-
Overview: Implements in-memory or temporary-file SQLite storage for duplicate code detection
|
|
7
|
-
Stores code blocks with hash values
|
|
6
|
+
Overview: Implements in-memory or temporary-file SQLite storage for duplicate code detection
|
|
7
|
+
and duplicate constants detection. Stores code blocks with hash values and constants with
|
|
8
|
+
name/value pairs, enabling cross-file duplicate detection during a single linter run.
|
|
8
9
|
Supports both :memory: mode (fast, RAM-only) and tempfile mode (disk-backed for large projects).
|
|
9
10
|
No persistence between runs - storage is cleared when linter completes. Includes indexes for
|
|
10
|
-
fast hash lookups enabling cross-file
|
|
11
|
+
fast hash lookups and constant name lookups enabling efficient cross-file detection.
|
|
11
12
|
|
|
12
13
|
Dependencies: Python sqlite3 module (stdlib), tempfile module (stdlib), pathlib.Path, dataclasses
|
|
13
14
|
|
|
14
15
|
Exports: CodeBlock dataclass, DRYCache class
|
|
15
16
|
|
|
16
17
|
Interfaces: DRYCache.__init__(storage_mode), add_blocks(file_path, blocks),
|
|
17
|
-
find_duplicates_by_hash(hash_value),
|
|
18
|
+
find_duplicates_by_hash(hash_value), duplicate_hashes, add_constants(file_path, constants),
|
|
19
|
+
all_constants, get_duplicate_constant_names(), close()
|
|
18
20
|
|
|
19
|
-
Implementation: SQLite with
|
|
21
|
+
Implementation: SQLite with three tables (files, code_blocks, constants), indexed for performance,
|
|
20
22
|
storage_mode determines :memory: vs tempfile location, ACID transactions for reliability
|
|
21
23
|
"""
|
|
22
24
|
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
23
27
|
import sqlite3
|
|
24
28
|
import tempfile
|
|
25
29
|
from dataclasses import dataclass
|
|
26
30
|
from pathlib import Path
|
|
31
|
+
from typing import TYPE_CHECKING
|
|
27
32
|
|
|
28
33
|
from .cache_query import CacheQueryService
|
|
29
34
|
|
|
35
|
+
if TYPE_CHECKING:
|
|
36
|
+
from .constant import ConstantInfo
|
|
37
|
+
|
|
30
38
|
|
|
31
39
|
@dataclass
|
|
32
40
|
class CodeBlock:
|
|
@@ -93,6 +101,19 @@ class DRYCache:
|
|
|
93
101
|
self.db.execute("CREATE INDEX IF NOT EXISTS idx_hash_value ON code_blocks(hash_value)")
|
|
94
102
|
self.db.execute("CREATE INDEX IF NOT EXISTS idx_file_path ON code_blocks(file_path)")
|
|
95
103
|
|
|
104
|
+
# Constants table for duplicate constant detection
|
|
105
|
+
self.db.execute(
|
|
106
|
+
"""CREATE TABLE IF NOT EXISTS constants (
|
|
107
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
108
|
+
file_path TEXT NOT NULL,
|
|
109
|
+
name TEXT NOT NULL,
|
|
110
|
+
line_number INTEGER NOT NULL,
|
|
111
|
+
value TEXT,
|
|
112
|
+
FOREIGN KEY (file_path) REFERENCES files(file_path) ON DELETE CASCADE
|
|
113
|
+
)"""
|
|
114
|
+
)
|
|
115
|
+
self.db.execute("CREATE INDEX IF NOT EXISTS idx_constant_name ON constants(name)")
|
|
116
|
+
|
|
96
117
|
self.db.commit()
|
|
97
118
|
|
|
98
119
|
def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
@@ -166,6 +187,73 @@ class DRYCache:
|
|
|
166
187
|
"""
|
|
167
188
|
return self._query_service.get_duplicate_hashes(self.db)
|
|
168
189
|
|
|
190
|
+
def add_constants(
|
|
191
|
+
self,
|
|
192
|
+
file_path: Path,
|
|
193
|
+
constants: list[ConstantInfo],
|
|
194
|
+
) -> None:
|
|
195
|
+
"""Add constants to storage.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
file_path: Path to source file
|
|
199
|
+
constants: List of ConstantInfo instances to store
|
|
200
|
+
"""
|
|
201
|
+
if not constants:
|
|
202
|
+
return
|
|
203
|
+
|
|
204
|
+
for const in constants:
|
|
205
|
+
self.db.execute(
|
|
206
|
+
"""INSERT INTO constants
|
|
207
|
+
(file_path, name, line_number, value)
|
|
208
|
+
VALUES (?, ?, ?, ?)""",
|
|
209
|
+
(
|
|
210
|
+
str(file_path),
|
|
211
|
+
const.name,
|
|
212
|
+
const.line_number,
|
|
213
|
+
const.value,
|
|
214
|
+
),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
self.db.commit()
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def all_constants(self) -> list[tuple[str, str, int, str | None]]:
|
|
221
|
+
"""All constants from storage.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
List of tuples: (file_path, name, line_number, value)
|
|
225
|
+
"""
|
|
226
|
+
cursor = self.db.execute("SELECT file_path, name, line_number, value FROM constants")
|
|
227
|
+
return cursor.fetchall()
|
|
228
|
+
|
|
229
|
+
def get_duplicate_constant_names(self) -> list[str]:
|
|
230
|
+
"""Get constant names that appear in 2+ files.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
List of constant names appearing in multiple files
|
|
234
|
+
"""
|
|
235
|
+
cursor = self.db.execute(
|
|
236
|
+
"""SELECT name FROM constants
|
|
237
|
+
GROUP BY name
|
|
238
|
+
HAVING COUNT(DISTINCT file_path) >= 2"""
|
|
239
|
+
)
|
|
240
|
+
return [row[0] for row in cursor.fetchall()]
|
|
241
|
+
|
|
242
|
+
def get_constants_by_name(self, name: str) -> list[tuple[str, int, str | None]]:
|
|
243
|
+
"""Get all locations of a constant by name.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
name: The constant name to search for
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
List of tuples: (file_path, line_number, value)
|
|
250
|
+
"""
|
|
251
|
+
cursor = self.db.execute(
|
|
252
|
+
"SELECT file_path, line_number, value FROM constants WHERE name = ?",
|
|
253
|
+
(name,),
|
|
254
|
+
)
|
|
255
|
+
return cursor.fetchall()
|
|
256
|
+
|
|
169
257
|
def close(self) -> None:
|
|
170
258
|
"""Close database connection and cleanup tempfile if used."""
|
|
171
259
|
self.db.close()
|
src/linters/dry/config.py
CHANGED
|
@@ -23,6 +23,7 @@ from typing import Any
|
|
|
23
23
|
# Default configuration constants
|
|
24
24
|
DEFAULT_MIN_DUPLICATE_LINES = 3
|
|
25
25
|
DEFAULT_MIN_DUPLICATE_TOKENS = 30
|
|
26
|
+
DEFAULT_DETECT_DUPLICATE_CONSTANTS = True
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
@dataclass
|
|
@@ -60,23 +61,34 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
60
61
|
}
|
|
61
62
|
)
|
|
62
63
|
|
|
64
|
+
# Duplicate constants detection
|
|
65
|
+
detect_duplicate_constants: bool = DEFAULT_DETECT_DUPLICATE_CONSTANTS
|
|
66
|
+
min_constant_occurrences: int = 2 # Minimum files with same constant to report
|
|
67
|
+
|
|
68
|
+
# Language-specific overrides for constant detection
|
|
69
|
+
python_min_constant_occurrences: int | None = None
|
|
70
|
+
typescript_min_constant_occurrences: int | None = None
|
|
71
|
+
|
|
63
72
|
def __post_init__(self) -> None:
|
|
64
73
|
"""Validate configuration values."""
|
|
65
|
-
|
|
66
|
-
raise ValueError(
|
|
67
|
-
f"min_duplicate_lines must be positive, got {self.min_duplicate_lines}"
|
|
68
|
-
)
|
|
69
|
-
if self.min_duplicate_tokens <= 0:
|
|
70
|
-
raise ValueError(
|
|
71
|
-
f"min_duplicate_tokens must be positive, got {self.min_duplicate_tokens}"
|
|
72
|
-
)
|
|
73
|
-
if self.min_occurrences <= 0:
|
|
74
|
-
raise ValueError(f"min_occurrences must be positive, got {self.min_occurrences}")
|
|
74
|
+
self._validate_positive_fields()
|
|
75
75
|
if self.storage_mode not in ("memory", "tempfile"):
|
|
76
76
|
raise ValueError(
|
|
77
77
|
f"storage_mode must be 'memory' or 'tempfile', got '{self.storage_mode}'"
|
|
78
78
|
)
|
|
79
79
|
|
|
80
|
+
def _validate_positive_fields(self) -> None:
|
|
81
|
+
"""Validate that required fields are positive."""
|
|
82
|
+
positive_fields = [
|
|
83
|
+
("min_duplicate_lines", self.min_duplicate_lines),
|
|
84
|
+
("min_duplicate_tokens", self.min_duplicate_tokens),
|
|
85
|
+
("min_occurrences", self.min_occurrences),
|
|
86
|
+
("min_constant_occurrences", self.min_constant_occurrences),
|
|
87
|
+
]
|
|
88
|
+
for name, value in positive_fields:
|
|
89
|
+
if value <= 0:
|
|
90
|
+
raise ValueError(f"{name} must be positive, got {value}")
|
|
91
|
+
|
|
80
92
|
def get_min_occurrences_for_language(self, language: str) -> int:
|
|
81
93
|
"""Get minimum occurrences threshold for a specific language.
|
|
82
94
|
|
|
@@ -97,6 +109,25 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
97
109
|
override = language_overrides.get(language_lower)
|
|
98
110
|
return override if override is not None else self.min_occurrences
|
|
99
111
|
|
|
112
|
+
def get_min_constant_occurrences_for_language(self, language: str) -> int:
|
|
113
|
+
"""Get minimum constant occurrences threshold for a specific language.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
language: Language identifier (e.g., "python", "typescript")
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Minimum constant occurrences threshold for the language, or global default
|
|
120
|
+
"""
|
|
121
|
+
language_lower = language.lower()
|
|
122
|
+
|
|
123
|
+
language_overrides = {
|
|
124
|
+
"python": self.python_min_constant_occurrences,
|
|
125
|
+
"typescript": self.typescript_min_constant_occurrences,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
override = language_overrides.get(language_lower)
|
|
129
|
+
return override if override is not None else self.min_constant_occurrences
|
|
130
|
+
|
|
100
131
|
@classmethod
|
|
101
132
|
def from_dict(cls, config: dict[str, Any]) -> "DRYConfig":
|
|
102
133
|
"""Load configuration from dictionary.
|
|
@@ -131,4 +162,10 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
131
162
|
storage_mode=config.get("storage_mode", "memory"),
|
|
132
163
|
ignore_patterns=config.get("ignore", []),
|
|
133
164
|
filters=filters,
|
|
165
|
+
detect_duplicate_constants=config.get(
|
|
166
|
+
"detect_duplicate_constants", DEFAULT_DETECT_DUPLICATE_CONSTANTS
|
|
167
|
+
),
|
|
168
|
+
min_constant_occurrences=config.get("min_constant_occurrences", 2),
|
|
169
|
+
python_min_constant_occurrences=python_config.get("min_constant_occurrences"),
|
|
170
|
+
typescript_min_constant_occurrences=typescript_config.get("min_constant_occurrences"),
|
|
134
171
|
)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Dataclasses for duplicate constants detection in DRY linter
|
|
3
|
+
|
|
4
|
+
Scope: Data structures for constant extraction and cross-file detection
|
|
5
|
+
|
|
6
|
+
Overview: Provides dataclasses for representing constants extracted from source code and their
|
|
7
|
+
locations across multiple files. ConstantInfo stores extracted constant metadata (name, line,
|
|
8
|
+
value) from a single file. ConstantLocation represents where a constant appears across the
|
|
9
|
+
project. ConstantGroup represents a group of related constants (exact or fuzzy matches) for
|
|
10
|
+
violation reporting. These structures support the duplicate constants detection feature that
|
|
11
|
+
identifies when the same constant name appears in multiple files.
|
|
12
|
+
|
|
13
|
+
Dependencies: Python dataclasses module, pathlib for Path types
|
|
14
|
+
|
|
15
|
+
Exports: ConstantInfo, ConstantLocation, ConstantGroup dataclasses
|
|
16
|
+
|
|
17
|
+
Interfaces: Dataclass constructors with named fields
|
|
18
|
+
|
|
19
|
+
Implementation: Immutable dataclasses with optional fields for extracted value context
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
# Shared pattern for ALL_CAPS constant names (public only, no leading underscore)
|
|
27
|
+
# Used by both Python and TypeScript constant extractors
|
|
28
|
+
# Requires at least 2 characters to exclude single-letter type params (P, T, K, V)
|
|
29
|
+
CONSTANT_NAME_PATTERN = re.compile(r"^[A-Z][A-Z0-9_]+$")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ConstantInfo:
|
|
34
|
+
"""Information about a constant extracted from source code.
|
|
35
|
+
|
|
36
|
+
Represents a single constant definition found during file analysis.
|
|
37
|
+
Used during the collection phase before cross-file matching.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
name: str # Constant name (e.g., "API_TIMEOUT")
|
|
41
|
+
line_number: int # Line where constant is defined
|
|
42
|
+
value: str | None = None # Optional: the value (for violation message context)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class ConstantLocation:
|
|
47
|
+
"""Location of a constant in the project.
|
|
48
|
+
|
|
49
|
+
Represents where a specific constant appears, including file path,
|
|
50
|
+
line number, and the value assigned. Used for cross-file reporting.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
file_path: Path
|
|
54
|
+
line_number: int
|
|
55
|
+
name: str
|
|
56
|
+
value: str | None = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class ConstantGroup:
|
|
61
|
+
"""A group of related constants for violation reporting.
|
|
62
|
+
|
|
63
|
+
Groups constants that match (either exactly or via fuzzy matching)
|
|
64
|
+
across multiple files. Used by the violation builder to generate
|
|
65
|
+
comprehensive violation messages.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
# The canonical name (first seen or most common)
|
|
69
|
+
canonical_name: str
|
|
70
|
+
|
|
71
|
+
# All locations where this constant (or fuzzy match) appears
|
|
72
|
+
locations: list[ConstantLocation] = field(default_factory=list)
|
|
73
|
+
|
|
74
|
+
# All names in this group (for fuzzy matches, may include variants)
|
|
75
|
+
all_names: set[str] = field(default_factory=set)
|
|
76
|
+
|
|
77
|
+
# Whether this is a fuzzy match (True) or exact match (False)
|
|
78
|
+
is_fuzzy_match: bool = False
|
|
79
|
+
|
|
80
|
+
def add_location(self, location: ConstantLocation) -> None:
|
|
81
|
+
"""Add a location to this group.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
location: The constant location to add
|
|
85
|
+
"""
|
|
86
|
+
self.locations.append(location)
|
|
87
|
+
self.all_names.add(location.name)
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def file_count(self) -> int:
|
|
91
|
+
"""Number of unique files containing this constant."""
|
|
92
|
+
return len({loc.file_path for loc in self.locations})
|