thailint 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/cli.py +101 -0
- src/config.py +6 -2
- src/core/base.py +90 -5
- src/linters/dry/block_filter.py +5 -2
- src/linters/dry/cache.py +46 -92
- src/linters/dry/config.py +17 -13
- src/linters/dry/duplicate_storage.py +17 -80
- src/linters/dry/file_analyzer.py +11 -48
- src/linters/dry/linter.py +5 -12
- src/linters/dry/python_analyzer.py +12 -1
- src/linters/dry/storage_initializer.py +9 -18
- src/linters/dry/violation_filter.py +4 -1
- src/linters/magic_numbers/__init__.py +48 -0
- src/linters/magic_numbers/config.py +71 -0
- src/linters/magic_numbers/context_analyzer.py +247 -0
- src/linters/magic_numbers/linter.py +452 -0
- src/linters/magic_numbers/python_analyzer.py +76 -0
- src/linters/magic_numbers/typescript_analyzer.py +217 -0
- src/linters/magic_numbers/violation_builder.py +98 -0
- src/linters/nesting/__init__.py +6 -2
- src/linters/nesting/config.py +6 -3
- src/linters/nesting/linter.py +8 -19
- src/linters/srp/__init__.py +3 -3
- src/linters/srp/config.py +12 -6
- src/linters/srp/linter.py +33 -24
- {thailint-0.2.1.dist-info → thailint-0.3.1.dist-info}/METADATA +196 -42
- {thailint-0.2.1.dist-info → thailint-0.3.1.dist-info}/RECORD +30 -23
- {thailint-0.2.1.dist-info → thailint-0.3.1.dist-info}/LICENSE +0 -0
- {thailint-0.2.1.dist-info → thailint-0.3.1.dist-info}/WHEEL +0 -0
- {thailint-0.2.1.dist-info → thailint-0.3.1.dist-info}/entry_points.txt +0 -0
src/cli.py
CHANGED
|
@@ -1077,5 +1077,106 @@ def _run_dry_lint(orchestrator, path_objs, recursive):
|
|
|
1077
1077
|
return dry_violations
|
|
1078
1078
|
|
|
1079
1079
|
|
|
1080
|
+
def _setup_magic_numbers_orchestrator(
|
|
1081
|
+
path_objs: list[Path], config_file: str | None, verbose: bool
|
|
1082
|
+
):
|
|
1083
|
+
"""Set up orchestrator for magic-numbers command."""
|
|
1084
|
+
first_path = path_objs[0] if path_objs else Path.cwd()
|
|
1085
|
+
project_root = first_path if first_path.is_dir() else first_path.parent
|
|
1086
|
+
|
|
1087
|
+
from src.orchestrator.core import Orchestrator
|
|
1088
|
+
|
|
1089
|
+
orchestrator = Orchestrator(project_root=project_root)
|
|
1090
|
+
|
|
1091
|
+
if config_file:
|
|
1092
|
+
_load_config_file(orchestrator, config_file, verbose)
|
|
1093
|
+
|
|
1094
|
+
return orchestrator
|
|
1095
|
+
|
|
1096
|
+
|
|
1097
|
+
def _run_magic_numbers_lint(orchestrator, path_objs: list[Path], recursive: bool):
|
|
1098
|
+
"""Execute magic-numbers lint on files or directories."""
|
|
1099
|
+
all_violations = _execute_linting_on_paths(orchestrator, path_objs, recursive)
|
|
1100
|
+
return [v for v in all_violations if "magic-number" in v.rule_id]
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
@cli.command("magic-numbers")
|
|
1104
|
+
@click.argument("paths", nargs=-1, type=click.Path())
|
|
1105
|
+
@click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
|
|
1106
|
+
@format_option
|
|
1107
|
+
@click.option("--recursive/--no-recursive", default=True, help="Scan directories recursively")
|
|
1108
|
+
@click.pass_context
|
|
1109
|
+
def magic_numbers( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
1110
|
+
ctx,
|
|
1111
|
+
paths: tuple[str, ...],
|
|
1112
|
+
config_file: str | None,
|
|
1113
|
+
format: str,
|
|
1114
|
+
recursive: bool,
|
|
1115
|
+
):
|
|
1116
|
+
"""Check for magic numbers in code.
|
|
1117
|
+
|
|
1118
|
+
Detects unnamed numeric literals in Python and TypeScript/JavaScript code
|
|
1119
|
+
that should be extracted as named constants for better readability.
|
|
1120
|
+
|
|
1121
|
+
PATHS: Files or directories to lint (defaults to current directory if none provided)
|
|
1122
|
+
|
|
1123
|
+
Examples:
|
|
1124
|
+
|
|
1125
|
+
\b
|
|
1126
|
+
# Check current directory (all files recursively)
|
|
1127
|
+
thai-lint magic-numbers
|
|
1128
|
+
|
|
1129
|
+
\b
|
|
1130
|
+
# Check specific directory
|
|
1131
|
+
thai-lint magic-numbers src/
|
|
1132
|
+
|
|
1133
|
+
\b
|
|
1134
|
+
# Check single file
|
|
1135
|
+
thai-lint magic-numbers src/app.py
|
|
1136
|
+
|
|
1137
|
+
\b
|
|
1138
|
+
# Check multiple files
|
|
1139
|
+
thai-lint magic-numbers src/app.py src/utils.py tests/test_app.py
|
|
1140
|
+
|
|
1141
|
+
\b
|
|
1142
|
+
# Check mix of files and directories
|
|
1143
|
+
thai-lint magic-numbers src/app.py tests/
|
|
1144
|
+
|
|
1145
|
+
\b
|
|
1146
|
+
# Get JSON output
|
|
1147
|
+
thai-lint magic-numbers --format json .
|
|
1148
|
+
|
|
1149
|
+
\b
|
|
1150
|
+
# Use custom config file
|
|
1151
|
+
thai-lint magic-numbers --config .thailint.yaml src/
|
|
1152
|
+
"""
|
|
1153
|
+
verbose = ctx.obj.get("verbose", False)
|
|
1154
|
+
|
|
1155
|
+
if not paths:
|
|
1156
|
+
paths = (".",)
|
|
1157
|
+
|
|
1158
|
+
path_objs = [Path(p) for p in paths]
|
|
1159
|
+
|
|
1160
|
+
try:
|
|
1161
|
+
_execute_magic_numbers_lint(path_objs, config_file, format, recursive, verbose)
|
|
1162
|
+
except Exception as e:
|
|
1163
|
+
_handle_linting_error(e, verbose)
|
|
1164
|
+
|
|
1165
|
+
|
|
1166
|
+
def _execute_magic_numbers_lint( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
1167
|
+
path_objs, config_file, format, recursive, verbose
|
|
1168
|
+
):
|
|
1169
|
+
"""Execute magic-numbers lint."""
|
|
1170
|
+
_validate_paths_exist(path_objs)
|
|
1171
|
+
orchestrator = _setup_magic_numbers_orchestrator(path_objs, config_file, verbose)
|
|
1172
|
+
magic_numbers_violations = _run_magic_numbers_lint(orchestrator, path_objs, recursive)
|
|
1173
|
+
|
|
1174
|
+
if verbose:
|
|
1175
|
+
logger.info(f"Found {len(magic_numbers_violations)} magic number violation(s)")
|
|
1176
|
+
|
|
1177
|
+
format_violations(magic_numbers_violations, format)
|
|
1178
|
+
sys.exit(1 if magic_numbers_violations else 0)
|
|
1179
|
+
|
|
1180
|
+
|
|
1080
1181
|
if __name__ == "__main__":
|
|
1081
1182
|
cli()
|
src/config.py
CHANGED
|
@@ -34,6 +34,10 @@ class ConfigError(Exception):
|
|
|
34
34
|
"""Configuration-related errors."""
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
# Default configuration constants
|
|
38
|
+
DEFAULT_MAX_RETRIES = 3
|
|
39
|
+
DEFAULT_TIMEOUT_SECONDS = 30
|
|
40
|
+
|
|
37
41
|
# Default configuration values
|
|
38
42
|
DEFAULT_CONFIG: dict[str, Any] = {
|
|
39
43
|
"app_name": "{{PROJECT_NAME}}",
|
|
@@ -41,8 +45,8 @@ DEFAULT_CONFIG: dict[str, Any] = {
|
|
|
41
45
|
"log_level": "INFO",
|
|
42
46
|
"output_format": "text",
|
|
43
47
|
"greeting": "Hello",
|
|
44
|
-
"max_retries":
|
|
45
|
-
"timeout":
|
|
48
|
+
"max_retries": DEFAULT_MAX_RETRIES,
|
|
49
|
+
"timeout": DEFAULT_TIMEOUT_SECONDS,
|
|
46
50
|
}
|
|
47
51
|
|
|
48
52
|
# Configuration file search paths (in priority order)
|
src/core/base.py
CHANGED
|
@@ -8,14 +8,17 @@ Overview: Establishes the contract that all linting plugins must follow through
|
|
|
8
8
|
Defines BaseLintRule which all concrete linting rules inherit from, specifying required
|
|
9
9
|
properties (rule_id, rule_name, description) and the check() method for violation detection.
|
|
10
10
|
Provides BaseLintContext as the interface for accessing file information during analysis,
|
|
11
|
-
exposing file_path, file_content, and language properties.
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
exposing file_path, file_content, and language properties. Includes MultiLanguageLintRule
|
|
12
|
+
intermediate class implementing template method pattern for language dispatch, eliminating
|
|
13
|
+
code duplication across multi-language linters (nesting, srp, magic_numbers). These
|
|
14
|
+
abstractions enable the rule registry to discover and instantiate rules dynamically without
|
|
15
|
+
tight coupling, supporting the extensible plugin system where new rules can be added by
|
|
16
|
+
simply placing them in the appropriate directory structure.
|
|
15
17
|
|
|
16
18
|
Dependencies: abc for abstract base class support, pathlib for Path types, Violation from types
|
|
17
19
|
|
|
18
|
-
Exports: BaseLintRule (abstract rule interface), BaseLintContext (abstract context interface)
|
|
20
|
+
Exports: BaseLintRule (abstract rule interface), BaseLintContext (abstract context interface),
|
|
21
|
+
MultiLanguageLintRule (template method base for multi-language linters)
|
|
19
22
|
|
|
20
23
|
Interfaces: BaseLintRule.check(context) -> list[Violation], BaseLintContext properties
|
|
21
24
|
(file_path, file_content, language), all abstract methods must be implemented by subclasses
|
|
@@ -26,6 +29,7 @@ Implementation: ABC-based interface definitions with @abstractmethod decorators,
|
|
|
26
29
|
|
|
27
30
|
from abc import ABC, abstractmethod
|
|
28
31
|
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
29
33
|
|
|
30
34
|
from .types import Violation
|
|
31
35
|
|
|
@@ -132,3 +136,84 @@ class BaseLintRule(ABC):
|
|
|
132
136
|
List of violations found during finalization. Empty list by default.
|
|
133
137
|
"""
|
|
134
138
|
return []
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class MultiLanguageLintRule(BaseLintRule):
|
|
142
|
+
"""Base class for linting rules that support multiple programming languages.
|
|
143
|
+
|
|
144
|
+
Provides language dispatch pattern to eliminate code duplication across multi-language
|
|
145
|
+
linters. Subclasses implement language-specific checking methods rather than handling
|
|
146
|
+
dispatch logic themselves.
|
|
147
|
+
|
|
148
|
+
Subclasses must implement:
|
|
149
|
+
- _check_python(context, config) for Python language support
|
|
150
|
+
- _check_typescript(context, config) for TypeScript/JavaScript support
|
|
151
|
+
- _load_config(context) for configuration loading
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
def check(self, context: BaseLintContext) -> list[Violation]:
|
|
155
|
+
"""Check for violations with automatic language dispatch.
|
|
156
|
+
|
|
157
|
+
Dispatches to language-specific checking methods based on context.language.
|
|
158
|
+
Handles common patterns like file content validation and config loading.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
context: Lint context with file information
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
List of violations found
|
|
165
|
+
"""
|
|
166
|
+
from .linter_utils import has_file_content
|
|
167
|
+
|
|
168
|
+
if not has_file_content(context):
|
|
169
|
+
return []
|
|
170
|
+
|
|
171
|
+
config = self._load_config(context)
|
|
172
|
+
if not config.enabled:
|
|
173
|
+
return []
|
|
174
|
+
|
|
175
|
+
if context.language == "python":
|
|
176
|
+
return self._check_python(context, config)
|
|
177
|
+
|
|
178
|
+
if context.language in ("typescript", "javascript"):
|
|
179
|
+
return self._check_typescript(context, config)
|
|
180
|
+
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
@abstractmethod
|
|
184
|
+
def _load_config(self, context: BaseLintContext) -> Any:
|
|
185
|
+
"""Load configuration from context.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
context: Lint context
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Configuration object with at minimum an 'enabled' attribute
|
|
192
|
+
"""
|
|
193
|
+
raise NotImplementedError("Subclasses must implement _load_config")
|
|
194
|
+
|
|
195
|
+
@abstractmethod
|
|
196
|
+
def _check_python(self, context: BaseLintContext, config: Any) -> list[Violation]:
|
|
197
|
+
"""Check Python code for violations.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
context: Lint context with Python file information
|
|
201
|
+
config: Loaded configuration
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of violations found in Python code
|
|
205
|
+
"""
|
|
206
|
+
raise NotImplementedError("Subclasses must implement _check_python")
|
|
207
|
+
|
|
208
|
+
@abstractmethod
|
|
209
|
+
def _check_typescript(self, context: BaseLintContext, config: Any) -> list[Violation]:
|
|
210
|
+
"""Check TypeScript/JavaScript code for violations.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
context: Lint context with TypeScript/JavaScript file information
|
|
214
|
+
config: Loaded configuration
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
List of violations found in TypeScript/JavaScript code
|
|
218
|
+
"""
|
|
219
|
+
raise NotImplementedError("Subclasses must implement _check_typescript")
|
src/linters/dry/block_filter.py
CHANGED
|
@@ -23,6 +23,9 @@ from abc import ABC, abstractmethod
|
|
|
23
23
|
from pathlib import Path
|
|
24
24
|
from typing import Protocol
|
|
25
25
|
|
|
26
|
+
# Default filter threshold constants
|
|
27
|
+
DEFAULT_KEYWORD_ARG_THRESHOLD = 0.8
|
|
28
|
+
|
|
26
29
|
|
|
27
30
|
class CodeBlock(Protocol):
|
|
28
31
|
"""Protocol for code blocks (matches cache.CodeBlock)."""
|
|
@@ -67,7 +70,7 @@ class KeywordArgumentFilter(BaseBlockFilter):
|
|
|
67
70
|
These are common in builder patterns and API calls.
|
|
68
71
|
"""
|
|
69
72
|
|
|
70
|
-
def __init__(self, threshold: float =
|
|
73
|
+
def __init__(self, threshold: float = DEFAULT_KEYWORD_ARG_THRESHOLD):
|
|
71
74
|
"""Initialize filter.
|
|
72
75
|
|
|
73
76
|
Args:
|
|
@@ -256,7 +259,7 @@ def create_default_registry() -> BlockFilterRegistry:
|
|
|
256
259
|
registry = BlockFilterRegistry()
|
|
257
260
|
|
|
258
261
|
# Register built-in filters
|
|
259
|
-
registry.register(KeywordArgumentFilter(threshold=
|
|
262
|
+
registry.register(KeywordArgumentFilter(threshold=DEFAULT_KEYWORD_ARG_THRESHOLD))
|
|
260
263
|
registry.register(ImportGroupFilter())
|
|
261
264
|
|
|
262
265
|
return registry
|
src/linters/dry/cache.py
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Purpose: SQLite
|
|
2
|
+
Purpose: SQLite storage manager for DRY linter duplicate detection
|
|
3
3
|
|
|
4
|
-
Scope: Code block storage
|
|
4
|
+
Scope: Code block storage and duplicate detection queries
|
|
5
5
|
|
|
6
|
-
Overview: Implements
|
|
7
|
-
Stores code blocks with hash values, file locations, and metadata
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
cross-file duplicate detection with minimal overhead.
|
|
6
|
+
Overview: Implements in-memory or temporary-file SQLite storage for duplicate code detection.
|
|
7
|
+
Stores code blocks with hash values, file locations, and metadata during a single linter run.
|
|
8
|
+
Supports both :memory: mode (fast, RAM-only) and tempfile mode (disk-backed for large projects).
|
|
9
|
+
No persistence between runs - storage is cleared when linter completes. Includes indexes for
|
|
10
|
+
fast hash lookups enabling cross-file duplicate detection with minimal overhead.
|
|
11
11
|
|
|
12
|
-
Dependencies: Python sqlite3 module (stdlib), pathlib.Path, dataclasses
|
|
12
|
+
Dependencies: Python sqlite3 module (stdlib), tempfile module (stdlib), pathlib.Path, dataclasses
|
|
13
13
|
|
|
14
14
|
Exports: CodeBlock dataclass, DRYCache class
|
|
15
15
|
|
|
16
|
-
Interfaces: DRYCache.__init__,
|
|
17
|
-
|
|
16
|
+
Interfaces: DRYCache.__init__(storage_mode), add_blocks(file_path, blocks),
|
|
17
|
+
find_duplicates_by_hash(hash_value), get_duplicate_hashes(), close()
|
|
18
18
|
|
|
19
19
|
Implementation: SQLite with two tables (files, code_blocks), indexed on hash_value for performance,
|
|
20
|
-
|
|
20
|
+
storage_mode determines :memory: vs tempfile location, ACID transactions for reliability
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
import sqlite3
|
|
24
|
+
import tempfile
|
|
24
25
|
from dataclasses import dataclass
|
|
25
26
|
from pathlib import Path
|
|
26
27
|
|
|
@@ -39,20 +40,32 @@ class CodeBlock:
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
class DRYCache:
|
|
42
|
-
"""SQLite-backed
|
|
43
|
+
"""SQLite-backed storage for duplicate detection."""
|
|
43
44
|
|
|
44
45
|
SCHEMA_VERSION = 1
|
|
45
46
|
|
|
46
|
-
def __init__(self,
|
|
47
|
-
"""Initialize
|
|
47
|
+
def __init__(self, storage_mode: str = "memory") -> None:
|
|
48
|
+
"""Initialize storage with SQLite database.
|
|
48
49
|
|
|
49
50
|
Args:
|
|
50
|
-
|
|
51
|
+
storage_mode: Storage mode - "memory" (default) or "tempfile"
|
|
51
52
|
"""
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
self._storage_mode = storage_mode
|
|
54
|
+
self._tempfile = None
|
|
55
|
+
|
|
56
|
+
# Create SQLite connection based on storage mode
|
|
57
|
+
if storage_mode == "memory":
|
|
58
|
+
self.db = sqlite3.connect(":memory:")
|
|
59
|
+
elif storage_mode == "tempfile":
|
|
60
|
+
# Create temporary file that auto-deletes on close
|
|
61
|
+
# pylint: disable=consider-using-with
|
|
62
|
+
# Justification: tempfile must remain open for SQLite connection lifetime.
|
|
63
|
+
# It is explicitly closed in close() method when cache is finalized.
|
|
64
|
+
self._tempfile = tempfile.NamedTemporaryFile(suffix=".db", delete=True)
|
|
65
|
+
self.db = sqlite3.connect(self._tempfile.name)
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError(f"Invalid storage_mode: {storage_mode}")
|
|
54
68
|
|
|
55
|
-
self.db = sqlite3.connect(str(cache_path))
|
|
56
69
|
self._query_service = CacheQueryService()
|
|
57
70
|
|
|
58
71
|
# Create schema
|
|
@@ -82,68 +95,24 @@ class DRYCache:
|
|
|
82
95
|
|
|
83
96
|
self.db.commit()
|
|
84
97
|
|
|
85
|
-
def
|
|
86
|
-
"""
|
|
98
|
+
def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
99
|
+
"""Add code blocks to storage.
|
|
87
100
|
|
|
88
101
|
Args:
|
|
89
|
-
file_path: Path to file
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
True if cache is fresh, False if stale or missing
|
|
94
|
-
"""
|
|
95
|
-
cursor = self.db.execute("SELECT mtime FROM files WHERE file_path = ?", (str(file_path),))
|
|
96
|
-
row = cursor.fetchone()
|
|
97
|
-
|
|
98
|
-
if not row:
|
|
99
|
-
return False # Not in cache
|
|
100
|
-
|
|
101
|
-
cached_mtime = row[0]
|
|
102
|
-
return cached_mtime == current_mtime
|
|
103
|
-
|
|
104
|
-
def load(self, file_path: Path) -> list[CodeBlock]:
|
|
105
|
-
"""Load cached code blocks for file.
|
|
106
|
-
|
|
107
|
-
Args:
|
|
108
|
-
file_path: Path to file
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
List of CodeBlock instances from cache
|
|
102
|
+
file_path: Path to source file
|
|
103
|
+
blocks: List of CodeBlock instances to store
|
|
112
104
|
"""
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
FROM code_blocks
|
|
116
|
-
WHERE file_path = ?""",
|
|
117
|
-
(str(file_path),),
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
blocks = []
|
|
121
|
-
for hash_val, start, end, snippet in cursor:
|
|
122
|
-
block = CodeBlock(
|
|
123
|
-
file_path=file_path,
|
|
124
|
-
start_line=start,
|
|
125
|
-
end_line=end,
|
|
126
|
-
snippet=snippet,
|
|
127
|
-
hash_value=hash_val,
|
|
128
|
-
)
|
|
129
|
-
blocks.append(block)
|
|
130
|
-
|
|
131
|
-
return blocks
|
|
132
|
-
|
|
133
|
-
def save(self, file_path: Path, mtime: float, blocks: list[CodeBlock]) -> None:
|
|
134
|
-
"""Save code blocks to cache.
|
|
135
|
-
|
|
136
|
-
Args:
|
|
137
|
-
file_path: Path to file
|
|
138
|
-
mtime: File modification time
|
|
139
|
-
blocks: List of CodeBlock instances to cache
|
|
140
|
-
"""
|
|
141
|
-
# Delete old data for this file
|
|
142
|
-
self.db.execute("DELETE FROM files WHERE file_path = ?", (str(file_path),))
|
|
105
|
+
if not blocks:
|
|
106
|
+
return
|
|
143
107
|
|
|
144
108
|
# Insert file metadata
|
|
109
|
+
try:
|
|
110
|
+
mtime = file_path.stat().st_mtime
|
|
111
|
+
except OSError:
|
|
112
|
+
mtime = 0.0 # File doesn't exist, use placeholder
|
|
113
|
+
|
|
145
114
|
self.db.execute(
|
|
146
|
-
"INSERT INTO files (file_path, mtime, hash_count) VALUES (?, ?, ?)",
|
|
115
|
+
"INSERT OR REPLACE INTO files (file_path, mtime, hash_count) VALUES (?, ?, ?)",
|
|
147
116
|
(str(file_path), mtime, len(blocks)),
|
|
148
117
|
)
|
|
149
118
|
|
|
@@ -164,23 +133,6 @@ class DRYCache:
|
|
|
164
133
|
|
|
165
134
|
self.db.commit()
|
|
166
135
|
|
|
167
|
-
def cleanup_stale(self, max_age_days: int) -> None:
|
|
168
|
-
"""Remove cache entries older than max_age_days.
|
|
169
|
-
|
|
170
|
-
Args:
|
|
171
|
-
max_age_days: Maximum age in days for cache entries
|
|
172
|
-
"""
|
|
173
|
-
# Use parameterized query to prevent SQL injection
|
|
174
|
-
self.db.execute(
|
|
175
|
-
"""DELETE FROM files
|
|
176
|
-
WHERE last_scanned < datetime('now', ? || ' days')""",
|
|
177
|
-
(f"-{max_age_days}",),
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
# Vacuum to reclaim space
|
|
181
|
-
self.db.execute("VACUUM")
|
|
182
|
-
self.db.commit()
|
|
183
|
-
|
|
184
136
|
def find_duplicates_by_hash(self, hash_value: int) -> list[CodeBlock]:
|
|
185
137
|
"""Find all code blocks with the given hash value.
|
|
186
138
|
|
|
@@ -214,5 +166,7 @@ class DRYCache:
|
|
|
214
166
|
return self._query_service.get_duplicate_hashes(self.db)
|
|
215
167
|
|
|
216
168
|
def close(self) -> None:
|
|
217
|
-
"""Close database connection."""
|
|
169
|
+
"""Close database connection and cleanup tempfile if used."""
|
|
218
170
|
self.db.close()
|
|
171
|
+
if self._tempfile:
|
|
172
|
+
self._tempfile.close()
|
src/linters/dry/config.py
CHANGED
|
@@ -20,6 +20,10 @@ Implementation: Dataclass with field defaults, __post_init__ validation, and dic
|
|
|
20
20
|
from dataclasses import dataclass, field
|
|
21
21
|
from typing import Any
|
|
22
22
|
|
|
23
|
+
# Default configuration constants
|
|
24
|
+
DEFAULT_MIN_DUPLICATE_LINES = 3
|
|
25
|
+
DEFAULT_MIN_DUPLICATE_TOKENS = 30
|
|
26
|
+
|
|
23
27
|
|
|
24
28
|
@dataclass
|
|
25
29
|
class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
@@ -27,14 +31,14 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
27
31
|
|
|
28
32
|
Note: Pylint too-many-instance-attributes disabled. This is a configuration
|
|
29
33
|
dataclass serving as a data container for related DRY linter settings.
|
|
30
|
-
All
|
|
31
|
-
overrides,
|
|
34
|
+
All attributes are cohesively related (detection thresholds, language
|
|
35
|
+
overrides, storage mode, filtering). Splitting would reduce cohesion and make
|
|
32
36
|
configuration loading more complex without meaningful benefit.
|
|
33
37
|
"""
|
|
34
38
|
|
|
35
39
|
enabled: bool = False # Must be explicitly enabled
|
|
36
|
-
min_duplicate_lines: int =
|
|
37
|
-
min_duplicate_tokens: int =
|
|
40
|
+
min_duplicate_lines: int = DEFAULT_MIN_DUPLICATE_LINES
|
|
41
|
+
min_duplicate_tokens: int = DEFAULT_MIN_DUPLICATE_TOKENS
|
|
38
42
|
min_occurrences: int = 2 # Minimum occurrences to report (default: 2)
|
|
39
43
|
|
|
40
44
|
# Language-specific overrides
|
|
@@ -42,10 +46,8 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
42
46
|
typescript_min_occurrences: int | None = None
|
|
43
47
|
javascript_min_occurrences: int | None = None
|
|
44
48
|
|
|
45
|
-
#
|
|
46
|
-
|
|
47
|
-
cache_path: str = ".thailint-cache/dry.db"
|
|
48
|
-
cache_max_age_days: int = 30
|
|
49
|
+
# Storage settings
|
|
50
|
+
storage_mode: str = "memory" # Options: "memory" (default) or "tempfile"
|
|
49
51
|
|
|
50
52
|
# Ignore patterns
|
|
51
53
|
ignore_patterns: list[str] = field(default_factory=lambda: ["tests/", "__init__.py"])
|
|
@@ -70,6 +72,10 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
70
72
|
)
|
|
71
73
|
if self.min_occurrences <= 0:
|
|
72
74
|
raise ValueError(f"min_occurrences must be positive, got {self.min_occurrences}")
|
|
75
|
+
if self.storage_mode not in ("memory", "tempfile"):
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"storage_mode must be 'memory' or 'tempfile', got '{self.storage_mode}'"
|
|
78
|
+
)
|
|
73
79
|
|
|
74
80
|
def get_min_occurrences_for_language(self, language: str) -> int:
|
|
75
81
|
"""Get minimum occurrences threshold for a specific language.
|
|
@@ -116,15 +122,13 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
116
122
|
|
|
117
123
|
return cls(
|
|
118
124
|
enabled=config.get("enabled", False),
|
|
119
|
-
min_duplicate_lines=config.get("min_duplicate_lines",
|
|
120
|
-
min_duplicate_tokens=config.get("min_duplicate_tokens",
|
|
125
|
+
min_duplicate_lines=config.get("min_duplicate_lines", DEFAULT_MIN_DUPLICATE_LINES),
|
|
126
|
+
min_duplicate_tokens=config.get("min_duplicate_tokens", DEFAULT_MIN_DUPLICATE_TOKENS),
|
|
121
127
|
min_occurrences=config.get("min_occurrences", 2),
|
|
122
128
|
python_min_occurrences=python_config.get("min_occurrences"),
|
|
123
129
|
typescript_min_occurrences=typescript_config.get("min_occurrences"),
|
|
124
130
|
javascript_min_occurrences=javascript_config.get("min_occurrences"),
|
|
125
|
-
|
|
126
|
-
cache_path=config.get("cache_path", ".thailint-cache/dry.db"),
|
|
127
|
-
cache_max_age_days=config.get("cache_max_age_days", 30),
|
|
131
|
+
storage_mode=config.get("storage_mode", "memory"),
|
|
128
132
|
ignore_patterns=config.get("ignore", []),
|
|
129
133
|
filters=filters,
|
|
130
134
|
)
|