thailint 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/cli.py +30 -4
- src/config.py +6 -2
- src/core/base.py +90 -5
- src/linters/dry/block_filter.py +5 -2
- src/linters/dry/cache.py +46 -92
- src/linters/dry/config.py +17 -13
- src/linters/dry/duplicate_storage.py +17 -80
- src/linters/dry/file_analyzer.py +11 -48
- src/linters/dry/linter.py +5 -12
- src/linters/dry/python_analyzer.py +12 -1
- src/linters/dry/storage_initializer.py +9 -18
- src/linters/dry/violation_filter.py +4 -1
- src/linters/magic_numbers/__init__.py +48 -0
- src/linters/magic_numbers/config.py +71 -0
- src/linters/magic_numbers/context_analyzer.py +247 -0
- src/linters/magic_numbers/linter.py +452 -0
- src/linters/magic_numbers/python_analyzer.py +76 -0
- src/linters/magic_numbers/typescript_analyzer.py +217 -0
- src/linters/magic_numbers/violation_builder.py +98 -0
- src/linters/nesting/__init__.py +6 -2
- src/linters/nesting/config.py +6 -3
- src/linters/nesting/linter.py +8 -19
- src/linters/srp/__init__.py +3 -3
- src/linters/srp/config.py +12 -6
- src/linters/srp/linter.py +33 -24
- {thailint-0.2.0.dist-info → thailint-0.3.0.dist-info}/METADATA +196 -42
- {thailint-0.2.0.dist-info → thailint-0.3.0.dist-info}/RECORD +30 -23
- {thailint-0.2.0.dist-info → thailint-0.3.0.dist-info}/LICENSE +0 -0
- {thailint-0.2.0.dist-info → thailint-0.3.0.dist-info}/WHEEL +0 -0
- {thailint-0.2.0.dist-info → thailint-0.3.0.dist-info}/entry_points.txt +0 -0
src/cli.py
CHANGED
|
@@ -363,7 +363,7 @@ def config_reset(ctx, yes: bool):
|
|
|
363
363
|
|
|
364
364
|
|
|
365
365
|
@cli.command("file-placement")
|
|
366
|
-
@click.argument("paths", nargs=-1, type=click.Path(
|
|
366
|
+
@click.argument("paths", nargs=-1, type=click.Path())
|
|
367
367
|
@click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
|
|
368
368
|
@click.option("--rules", "-r", help="Inline JSON rules configuration")
|
|
369
369
|
@format_option
|
|
@@ -432,6 +432,7 @@ def _execute_file_placement_lint( # pylint: disable=too-many-arguments,too-many
|
|
|
432
432
|
path_objs, config_file, rules, format, recursive, verbose
|
|
433
433
|
):
|
|
434
434
|
"""Execute file placement linting."""
|
|
435
|
+
_validate_paths_exist(path_objs)
|
|
435
436
|
orchestrator = _setup_orchestrator(path_objs, config_file, rules, verbose)
|
|
436
437
|
all_violations = _execute_linting_on_paths(orchestrator, path_objs, recursive)
|
|
437
438
|
|
|
@@ -453,6 +454,28 @@ def _handle_linting_error(error: Exception, verbose: bool) -> None:
|
|
|
453
454
|
sys.exit(2)
|
|
454
455
|
|
|
455
456
|
|
|
457
|
+
def _validate_paths_exist(path_objs: list[Path]) -> None:
|
|
458
|
+
"""Validate that all provided paths exist.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
path_objs: List of Path objects to validate
|
|
462
|
+
|
|
463
|
+
Raises:
|
|
464
|
+
SystemExit: If any path doesn't exist (exit code 2)
|
|
465
|
+
"""
|
|
466
|
+
for path in path_objs:
|
|
467
|
+
if not path.exists():
|
|
468
|
+
click.echo(f"Error: Path does not exist: {path}", err=True)
|
|
469
|
+
click.echo("", err=True)
|
|
470
|
+
click.echo(
|
|
471
|
+
"Hint: When using Docker, ensure paths are inside the mounted volume:", err=True
|
|
472
|
+
)
|
|
473
|
+
click.echo(
|
|
474
|
+
" docker run -v $(pwd):/data thailint <command> /data/your-file.py", err=True
|
|
475
|
+
)
|
|
476
|
+
sys.exit(2)
|
|
477
|
+
|
|
478
|
+
|
|
456
479
|
def _find_project_root(start_path: Path) -> Path:
|
|
457
480
|
"""Find project root by looking for .git or pyproject.toml.
|
|
458
481
|
|
|
@@ -637,7 +660,7 @@ def _run_nesting_lint(orchestrator, path_objs: list[Path], recursive: bool):
|
|
|
637
660
|
|
|
638
661
|
|
|
639
662
|
@cli.command("nesting")
|
|
640
|
-
@click.argument("paths", nargs=-1, type=click.Path(
|
|
663
|
+
@click.argument("paths", nargs=-1, type=click.Path())
|
|
641
664
|
@click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
|
|
642
665
|
@format_option
|
|
643
666
|
@click.option("--max-depth", type=int, help="Override max nesting depth (default: 4)")
|
|
@@ -710,6 +733,7 @@ def _execute_nesting_lint( # pylint: disable=too-many-arguments,too-many-positi
|
|
|
710
733
|
path_objs, config_file, format, max_depth, recursive, verbose
|
|
711
734
|
):
|
|
712
735
|
"""Execute nesting lint."""
|
|
736
|
+
_validate_paths_exist(path_objs)
|
|
713
737
|
orchestrator = _setup_nesting_orchestrator(path_objs, config_file, verbose)
|
|
714
738
|
_apply_nesting_config_override(orchestrator, max_depth, verbose)
|
|
715
739
|
nesting_violations = _run_nesting_lint(orchestrator, path_objs, recursive)
|
|
@@ -773,7 +797,7 @@ def _run_srp_lint(orchestrator, path_objs: list[Path], recursive: bool):
|
|
|
773
797
|
|
|
774
798
|
|
|
775
799
|
@cli.command("srp")
|
|
776
|
-
@click.argument("paths", nargs=-1, type=click.Path(
|
|
800
|
+
@click.argument("paths", nargs=-1, type=click.Path())
|
|
777
801
|
@click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
|
|
778
802
|
@format_option
|
|
779
803
|
@click.option("--max-methods", type=int, help="Override max methods per class (default: 7)")
|
|
@@ -845,6 +869,7 @@ def _execute_srp_lint( # pylint: disable=too-many-arguments,too-many-positional
|
|
|
845
869
|
path_objs, config_file, format, max_methods, max_loc, recursive, verbose
|
|
846
870
|
):
|
|
847
871
|
"""Execute SRP lint."""
|
|
872
|
+
_validate_paths_exist(path_objs)
|
|
848
873
|
orchestrator = _setup_srp_orchestrator(path_objs, config_file, verbose)
|
|
849
874
|
_apply_srp_config_override(orchestrator, max_methods, max_loc, verbose)
|
|
850
875
|
srp_violations = _run_srp_lint(orchestrator, path_objs, recursive)
|
|
@@ -857,7 +882,7 @@ def _execute_srp_lint( # pylint: disable=too-many-arguments,too-many-positional
|
|
|
857
882
|
|
|
858
883
|
|
|
859
884
|
@cli.command("dry")
|
|
860
|
-
@click.argument("paths", nargs=-1, type=click.Path(
|
|
885
|
+
@click.argument("paths", nargs=-1, type=click.Path())
|
|
861
886
|
@click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
|
|
862
887
|
@format_option
|
|
863
888
|
@click.option("--min-lines", type=int, help="Override min duplicate lines threshold")
|
|
@@ -943,6 +968,7 @@ def _execute_dry_lint( # pylint: disable=too-many-arguments,too-many-positional
|
|
|
943
968
|
path_objs, config_file, format, min_lines, no_cache, clear_cache, recursive, verbose
|
|
944
969
|
):
|
|
945
970
|
"""Execute DRY linting."""
|
|
971
|
+
_validate_paths_exist(path_objs)
|
|
946
972
|
orchestrator = _setup_dry_orchestrator(path_objs, config_file, verbose)
|
|
947
973
|
_apply_dry_config_override(orchestrator, min_lines, no_cache, verbose)
|
|
948
974
|
|
src/config.py
CHANGED
|
@@ -34,6 +34,10 @@ class ConfigError(Exception):
|
|
|
34
34
|
"""Configuration-related errors."""
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
# Default configuration constants
|
|
38
|
+
DEFAULT_MAX_RETRIES = 3
|
|
39
|
+
DEFAULT_TIMEOUT_SECONDS = 30
|
|
40
|
+
|
|
37
41
|
# Default configuration values
|
|
38
42
|
DEFAULT_CONFIG: dict[str, Any] = {
|
|
39
43
|
"app_name": "{{PROJECT_NAME}}",
|
|
@@ -41,8 +45,8 @@ DEFAULT_CONFIG: dict[str, Any] = {
|
|
|
41
45
|
"log_level": "INFO",
|
|
42
46
|
"output_format": "text",
|
|
43
47
|
"greeting": "Hello",
|
|
44
|
-
"max_retries":
|
|
45
|
-
"timeout":
|
|
48
|
+
"max_retries": DEFAULT_MAX_RETRIES,
|
|
49
|
+
"timeout": DEFAULT_TIMEOUT_SECONDS,
|
|
46
50
|
}
|
|
47
51
|
|
|
48
52
|
# Configuration file search paths (in priority order)
|
src/core/base.py
CHANGED
|
@@ -8,14 +8,17 @@ Overview: Establishes the contract that all linting plugins must follow through
|
|
|
8
8
|
Defines BaseLintRule which all concrete linting rules inherit from, specifying required
|
|
9
9
|
properties (rule_id, rule_name, description) and the check() method for violation detection.
|
|
10
10
|
Provides BaseLintContext as the interface for accessing file information during analysis,
|
|
11
|
-
exposing file_path, file_content, and language properties.
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
exposing file_path, file_content, and language properties. Includes MultiLanguageLintRule
|
|
12
|
+
intermediate class implementing template method pattern for language dispatch, eliminating
|
|
13
|
+
code duplication across multi-language linters (nesting, srp, magic_numbers). These
|
|
14
|
+
abstractions enable the rule registry to discover and instantiate rules dynamically without
|
|
15
|
+
tight coupling, supporting the extensible plugin system where new rules can be added by
|
|
16
|
+
simply placing them in the appropriate directory structure.
|
|
15
17
|
|
|
16
18
|
Dependencies: abc for abstract base class support, pathlib for Path types, Violation from types
|
|
17
19
|
|
|
18
|
-
Exports: BaseLintRule (abstract rule interface), BaseLintContext (abstract context interface)
|
|
20
|
+
Exports: BaseLintRule (abstract rule interface), BaseLintContext (abstract context interface),
|
|
21
|
+
MultiLanguageLintRule (template method base for multi-language linters)
|
|
19
22
|
|
|
20
23
|
Interfaces: BaseLintRule.check(context) -> list[Violation], BaseLintContext properties
|
|
21
24
|
(file_path, file_content, language), all abstract methods must be implemented by subclasses
|
|
@@ -26,6 +29,7 @@ Implementation: ABC-based interface definitions with @abstractmethod decorators,
|
|
|
26
29
|
|
|
27
30
|
from abc import ABC, abstractmethod
|
|
28
31
|
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
29
33
|
|
|
30
34
|
from .types import Violation
|
|
31
35
|
|
|
@@ -132,3 +136,84 @@ class BaseLintRule(ABC):
|
|
|
132
136
|
List of violations found during finalization. Empty list by default.
|
|
133
137
|
"""
|
|
134
138
|
return []
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class MultiLanguageLintRule(BaseLintRule):
|
|
142
|
+
"""Base class for linting rules that support multiple programming languages.
|
|
143
|
+
|
|
144
|
+
Provides language dispatch pattern to eliminate code duplication across multi-language
|
|
145
|
+
linters. Subclasses implement language-specific checking methods rather than handling
|
|
146
|
+
dispatch logic themselves.
|
|
147
|
+
|
|
148
|
+
Subclasses must implement:
|
|
149
|
+
- _check_python(context, config) for Python language support
|
|
150
|
+
- _check_typescript(context, config) for TypeScript/JavaScript support
|
|
151
|
+
- _load_config(context) for configuration loading
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
def check(self, context: BaseLintContext) -> list[Violation]:
|
|
155
|
+
"""Check for violations with automatic language dispatch.
|
|
156
|
+
|
|
157
|
+
Dispatches to language-specific checking methods based on context.language.
|
|
158
|
+
Handles common patterns like file content validation and config loading.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
context: Lint context with file information
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
List of violations found
|
|
165
|
+
"""
|
|
166
|
+
from .linter_utils import has_file_content
|
|
167
|
+
|
|
168
|
+
if not has_file_content(context):
|
|
169
|
+
return []
|
|
170
|
+
|
|
171
|
+
config = self._load_config(context)
|
|
172
|
+
if not config.enabled:
|
|
173
|
+
return []
|
|
174
|
+
|
|
175
|
+
if context.language == "python":
|
|
176
|
+
return self._check_python(context, config)
|
|
177
|
+
|
|
178
|
+
if context.language in ("typescript", "javascript"):
|
|
179
|
+
return self._check_typescript(context, config)
|
|
180
|
+
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
@abstractmethod
|
|
184
|
+
def _load_config(self, context: BaseLintContext) -> Any:
|
|
185
|
+
"""Load configuration from context.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
context: Lint context
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Configuration object with at minimum an 'enabled' attribute
|
|
192
|
+
"""
|
|
193
|
+
raise NotImplementedError("Subclasses must implement _load_config")
|
|
194
|
+
|
|
195
|
+
@abstractmethod
|
|
196
|
+
def _check_python(self, context: BaseLintContext, config: Any) -> list[Violation]:
|
|
197
|
+
"""Check Python code for violations.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
context: Lint context with Python file information
|
|
201
|
+
config: Loaded configuration
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of violations found in Python code
|
|
205
|
+
"""
|
|
206
|
+
raise NotImplementedError("Subclasses must implement _check_python")
|
|
207
|
+
|
|
208
|
+
@abstractmethod
|
|
209
|
+
def _check_typescript(self, context: BaseLintContext, config: Any) -> list[Violation]:
|
|
210
|
+
"""Check TypeScript/JavaScript code for violations.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
context: Lint context with TypeScript/JavaScript file information
|
|
214
|
+
config: Loaded configuration
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
List of violations found in TypeScript/JavaScript code
|
|
218
|
+
"""
|
|
219
|
+
raise NotImplementedError("Subclasses must implement _check_typescript")
|
src/linters/dry/block_filter.py
CHANGED
|
@@ -23,6 +23,9 @@ from abc import ABC, abstractmethod
|
|
|
23
23
|
from pathlib import Path
|
|
24
24
|
from typing import Protocol
|
|
25
25
|
|
|
26
|
+
# Default filter threshold constants
|
|
27
|
+
DEFAULT_KEYWORD_ARG_THRESHOLD = 0.8
|
|
28
|
+
|
|
26
29
|
|
|
27
30
|
class CodeBlock(Protocol):
|
|
28
31
|
"""Protocol for code blocks (matches cache.CodeBlock)."""
|
|
@@ -67,7 +70,7 @@ class KeywordArgumentFilter(BaseBlockFilter):
|
|
|
67
70
|
These are common in builder patterns and API calls.
|
|
68
71
|
"""
|
|
69
72
|
|
|
70
|
-
def __init__(self, threshold: float =
|
|
73
|
+
def __init__(self, threshold: float = DEFAULT_KEYWORD_ARG_THRESHOLD):
|
|
71
74
|
"""Initialize filter.
|
|
72
75
|
|
|
73
76
|
Args:
|
|
@@ -256,7 +259,7 @@ def create_default_registry() -> BlockFilterRegistry:
|
|
|
256
259
|
registry = BlockFilterRegistry()
|
|
257
260
|
|
|
258
261
|
# Register built-in filters
|
|
259
|
-
registry.register(KeywordArgumentFilter(threshold=
|
|
262
|
+
registry.register(KeywordArgumentFilter(threshold=DEFAULT_KEYWORD_ARG_THRESHOLD))
|
|
260
263
|
registry.register(ImportGroupFilter())
|
|
261
264
|
|
|
262
265
|
return registry
|
src/linters/dry/cache.py
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Purpose: SQLite
|
|
2
|
+
Purpose: SQLite storage manager for DRY linter duplicate detection
|
|
3
3
|
|
|
4
|
-
Scope: Code block storage
|
|
4
|
+
Scope: Code block storage and duplicate detection queries
|
|
5
5
|
|
|
6
|
-
Overview: Implements
|
|
7
|
-
Stores code blocks with hash values, file locations, and metadata
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
cross-file duplicate detection with minimal overhead.
|
|
6
|
+
Overview: Implements in-memory or temporary-file SQLite storage for duplicate code detection.
|
|
7
|
+
Stores code blocks with hash values, file locations, and metadata during a single linter run.
|
|
8
|
+
Supports both :memory: mode (fast, RAM-only) and tempfile mode (disk-backed for large projects).
|
|
9
|
+
No persistence between runs - storage is cleared when linter completes. Includes indexes for
|
|
10
|
+
fast hash lookups enabling cross-file duplicate detection with minimal overhead.
|
|
11
11
|
|
|
12
|
-
Dependencies: Python sqlite3 module (stdlib), pathlib.Path, dataclasses
|
|
12
|
+
Dependencies: Python sqlite3 module (stdlib), tempfile module (stdlib), pathlib.Path, dataclasses
|
|
13
13
|
|
|
14
14
|
Exports: CodeBlock dataclass, DRYCache class
|
|
15
15
|
|
|
16
|
-
Interfaces: DRYCache.__init__,
|
|
17
|
-
|
|
16
|
+
Interfaces: DRYCache.__init__(storage_mode), add_blocks(file_path, blocks),
|
|
17
|
+
find_duplicates_by_hash(hash_value), get_duplicate_hashes(), close()
|
|
18
18
|
|
|
19
19
|
Implementation: SQLite with two tables (files, code_blocks), indexed on hash_value for performance,
|
|
20
|
-
|
|
20
|
+
storage_mode determines :memory: vs tempfile location, ACID transactions for reliability
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
import sqlite3
|
|
24
|
+
import tempfile
|
|
24
25
|
from dataclasses import dataclass
|
|
25
26
|
from pathlib import Path
|
|
26
27
|
|
|
@@ -39,20 +40,32 @@ class CodeBlock:
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
class DRYCache:
|
|
42
|
-
"""SQLite-backed
|
|
43
|
+
"""SQLite-backed storage for duplicate detection."""
|
|
43
44
|
|
|
44
45
|
SCHEMA_VERSION = 1
|
|
45
46
|
|
|
46
|
-
def __init__(self,
|
|
47
|
-
"""Initialize
|
|
47
|
+
def __init__(self, storage_mode: str = "memory") -> None:
|
|
48
|
+
"""Initialize storage with SQLite database.
|
|
48
49
|
|
|
49
50
|
Args:
|
|
50
|
-
|
|
51
|
+
storage_mode: Storage mode - "memory" (default) or "tempfile"
|
|
51
52
|
"""
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
self._storage_mode = storage_mode
|
|
54
|
+
self._tempfile = None
|
|
55
|
+
|
|
56
|
+
# Create SQLite connection based on storage mode
|
|
57
|
+
if storage_mode == "memory":
|
|
58
|
+
self.db = sqlite3.connect(":memory:")
|
|
59
|
+
elif storage_mode == "tempfile":
|
|
60
|
+
# Create temporary file that auto-deletes on close
|
|
61
|
+
# pylint: disable=consider-using-with
|
|
62
|
+
# Justification: tempfile must remain open for SQLite connection lifetime.
|
|
63
|
+
# It is explicitly closed in close() method when cache is finalized.
|
|
64
|
+
self._tempfile = tempfile.NamedTemporaryFile(suffix=".db", delete=True)
|
|
65
|
+
self.db = sqlite3.connect(self._tempfile.name)
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError(f"Invalid storage_mode: {storage_mode}")
|
|
54
68
|
|
|
55
|
-
self.db = sqlite3.connect(str(cache_path))
|
|
56
69
|
self._query_service = CacheQueryService()
|
|
57
70
|
|
|
58
71
|
# Create schema
|
|
@@ -82,68 +95,24 @@ class DRYCache:
|
|
|
82
95
|
|
|
83
96
|
self.db.commit()
|
|
84
97
|
|
|
85
|
-
def
|
|
86
|
-
"""
|
|
98
|
+
def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
99
|
+
"""Add code blocks to storage.
|
|
87
100
|
|
|
88
101
|
Args:
|
|
89
|
-
file_path: Path to file
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
True if cache is fresh, False if stale or missing
|
|
94
|
-
"""
|
|
95
|
-
cursor = self.db.execute("SELECT mtime FROM files WHERE file_path = ?", (str(file_path),))
|
|
96
|
-
row = cursor.fetchone()
|
|
97
|
-
|
|
98
|
-
if not row:
|
|
99
|
-
return False # Not in cache
|
|
100
|
-
|
|
101
|
-
cached_mtime = row[0]
|
|
102
|
-
return cached_mtime == current_mtime
|
|
103
|
-
|
|
104
|
-
def load(self, file_path: Path) -> list[CodeBlock]:
|
|
105
|
-
"""Load cached code blocks for file.
|
|
106
|
-
|
|
107
|
-
Args:
|
|
108
|
-
file_path: Path to file
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
List of CodeBlock instances from cache
|
|
102
|
+
file_path: Path to source file
|
|
103
|
+
blocks: List of CodeBlock instances to store
|
|
112
104
|
"""
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
FROM code_blocks
|
|
116
|
-
WHERE file_path = ?""",
|
|
117
|
-
(str(file_path),),
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
blocks = []
|
|
121
|
-
for hash_val, start, end, snippet in cursor:
|
|
122
|
-
block = CodeBlock(
|
|
123
|
-
file_path=file_path,
|
|
124
|
-
start_line=start,
|
|
125
|
-
end_line=end,
|
|
126
|
-
snippet=snippet,
|
|
127
|
-
hash_value=hash_val,
|
|
128
|
-
)
|
|
129
|
-
blocks.append(block)
|
|
130
|
-
|
|
131
|
-
return blocks
|
|
132
|
-
|
|
133
|
-
def save(self, file_path: Path, mtime: float, blocks: list[CodeBlock]) -> None:
|
|
134
|
-
"""Save code blocks to cache.
|
|
135
|
-
|
|
136
|
-
Args:
|
|
137
|
-
file_path: Path to file
|
|
138
|
-
mtime: File modification time
|
|
139
|
-
blocks: List of CodeBlock instances to cache
|
|
140
|
-
"""
|
|
141
|
-
# Delete old data for this file
|
|
142
|
-
self.db.execute("DELETE FROM files WHERE file_path = ?", (str(file_path),))
|
|
105
|
+
if not blocks:
|
|
106
|
+
return
|
|
143
107
|
|
|
144
108
|
# Insert file metadata
|
|
109
|
+
try:
|
|
110
|
+
mtime = file_path.stat().st_mtime
|
|
111
|
+
except OSError:
|
|
112
|
+
mtime = 0.0 # File doesn't exist, use placeholder
|
|
113
|
+
|
|
145
114
|
self.db.execute(
|
|
146
|
-
"INSERT INTO files (file_path, mtime, hash_count) VALUES (?, ?, ?)",
|
|
115
|
+
"INSERT OR REPLACE INTO files (file_path, mtime, hash_count) VALUES (?, ?, ?)",
|
|
147
116
|
(str(file_path), mtime, len(blocks)),
|
|
148
117
|
)
|
|
149
118
|
|
|
@@ -164,23 +133,6 @@ class DRYCache:
|
|
|
164
133
|
|
|
165
134
|
self.db.commit()
|
|
166
135
|
|
|
167
|
-
def cleanup_stale(self, max_age_days: int) -> None:
|
|
168
|
-
"""Remove cache entries older than max_age_days.
|
|
169
|
-
|
|
170
|
-
Args:
|
|
171
|
-
max_age_days: Maximum age in days for cache entries
|
|
172
|
-
"""
|
|
173
|
-
# Use parameterized query to prevent SQL injection
|
|
174
|
-
self.db.execute(
|
|
175
|
-
"""DELETE FROM files
|
|
176
|
-
WHERE last_scanned < datetime('now', ? || ' days')""",
|
|
177
|
-
(f"-{max_age_days}",),
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
# Vacuum to reclaim space
|
|
181
|
-
self.db.execute("VACUUM")
|
|
182
|
-
self.db.commit()
|
|
183
|
-
|
|
184
136
|
def find_duplicates_by_hash(self, hash_value: int) -> list[CodeBlock]:
|
|
185
137
|
"""Find all code blocks with the given hash value.
|
|
186
138
|
|
|
@@ -214,5 +166,7 @@ class DRYCache:
|
|
|
214
166
|
return self._query_service.get_duplicate_hashes(self.db)
|
|
215
167
|
|
|
216
168
|
def close(self) -> None:
|
|
217
|
-
"""Close database connection."""
|
|
169
|
+
"""Close database connection and cleanup tempfile if used."""
|
|
218
170
|
self.db.close()
|
|
171
|
+
if self._tempfile:
|
|
172
|
+
self._tempfile.close()
|
src/linters/dry/config.py
CHANGED
|
@@ -20,6 +20,10 @@ Implementation: Dataclass with field defaults, __post_init__ validation, and dic
|
|
|
20
20
|
from dataclasses import dataclass, field
|
|
21
21
|
from typing import Any
|
|
22
22
|
|
|
23
|
+
# Default configuration constants
|
|
24
|
+
DEFAULT_MIN_DUPLICATE_LINES = 3
|
|
25
|
+
DEFAULT_MIN_DUPLICATE_TOKENS = 30
|
|
26
|
+
|
|
23
27
|
|
|
24
28
|
@dataclass
|
|
25
29
|
class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
@@ -27,14 +31,14 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
27
31
|
|
|
28
32
|
Note: Pylint too-many-instance-attributes disabled. This is a configuration
|
|
29
33
|
dataclass serving as a data container for related DRY linter settings.
|
|
30
|
-
All
|
|
31
|
-
overrides,
|
|
34
|
+
All attributes are cohesively related (detection thresholds, language
|
|
35
|
+
overrides, storage mode, filtering). Splitting would reduce cohesion and make
|
|
32
36
|
configuration loading more complex without meaningful benefit.
|
|
33
37
|
"""
|
|
34
38
|
|
|
35
39
|
enabled: bool = False # Must be explicitly enabled
|
|
36
|
-
min_duplicate_lines: int =
|
|
37
|
-
min_duplicate_tokens: int =
|
|
40
|
+
min_duplicate_lines: int = DEFAULT_MIN_DUPLICATE_LINES
|
|
41
|
+
min_duplicate_tokens: int = DEFAULT_MIN_DUPLICATE_TOKENS
|
|
38
42
|
min_occurrences: int = 2 # Minimum occurrences to report (default: 2)
|
|
39
43
|
|
|
40
44
|
# Language-specific overrides
|
|
@@ -42,10 +46,8 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
42
46
|
typescript_min_occurrences: int | None = None
|
|
43
47
|
javascript_min_occurrences: int | None = None
|
|
44
48
|
|
|
45
|
-
#
|
|
46
|
-
|
|
47
|
-
cache_path: str = ".thailint-cache/dry.db"
|
|
48
|
-
cache_max_age_days: int = 30
|
|
49
|
+
# Storage settings
|
|
50
|
+
storage_mode: str = "memory" # Options: "memory" (default) or "tempfile"
|
|
49
51
|
|
|
50
52
|
# Ignore patterns
|
|
51
53
|
ignore_patterns: list[str] = field(default_factory=lambda: ["tests/", "__init__.py"])
|
|
@@ -70,6 +72,10 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
70
72
|
)
|
|
71
73
|
if self.min_occurrences <= 0:
|
|
72
74
|
raise ValueError(f"min_occurrences must be positive, got {self.min_occurrences}")
|
|
75
|
+
if self.storage_mode not in ("memory", "tempfile"):
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"storage_mode must be 'memory' or 'tempfile', got '{self.storage_mode}'"
|
|
78
|
+
)
|
|
73
79
|
|
|
74
80
|
def get_min_occurrences_for_language(self, language: str) -> int:
|
|
75
81
|
"""Get minimum occurrences threshold for a specific language.
|
|
@@ -116,15 +122,13 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
|
|
|
116
122
|
|
|
117
123
|
return cls(
|
|
118
124
|
enabled=config.get("enabled", False),
|
|
119
|
-
min_duplicate_lines=config.get("min_duplicate_lines",
|
|
120
|
-
min_duplicate_tokens=config.get("min_duplicate_tokens",
|
|
125
|
+
min_duplicate_lines=config.get("min_duplicate_lines", DEFAULT_MIN_DUPLICATE_LINES),
|
|
126
|
+
min_duplicate_tokens=config.get("min_duplicate_tokens", DEFAULT_MIN_DUPLICATE_TOKENS),
|
|
121
127
|
min_occurrences=config.get("min_occurrences", 2),
|
|
122
128
|
python_min_occurrences=python_config.get("min_occurrences"),
|
|
123
129
|
typescript_min_occurrences=typescript_config.get("min_occurrences"),
|
|
124
130
|
javascript_min_occurrences=javascript_config.get("min_occurrences"),
|
|
125
|
-
|
|
126
|
-
cache_path=config.get("cache_path", ".thailint-cache/dry.db"),
|
|
127
|
-
cache_max_age_days=config.get("cache_max_age_days", 30),
|
|
131
|
+
storage_mode=config.get("storage_mode", "memory"),
|
|
128
132
|
ignore_patterns=config.get("ignore", []),
|
|
129
133
|
filters=filters,
|
|
130
134
|
)
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Purpose: Storage management for duplicate code blocks
|
|
2
|
+
Purpose: Storage management for duplicate code blocks in SQLite
|
|
3
3
|
|
|
4
|
-
Scope: Manages storage of code blocks in SQLite
|
|
4
|
+
Scope: Manages storage of code blocks in SQLite for duplicate detection
|
|
5
5
|
|
|
6
|
-
Overview: Provides
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
concerns from linting logic to maintain SRP compliance.
|
|
6
|
+
Overview: Provides storage interface for code blocks using SQLite (in-memory or tempfile mode).
|
|
7
|
+
Handles block insertion and duplicate hash queries. Delegates all storage operations to
|
|
8
|
+
DRYCache SQLite layer. Separates storage concerns from linting logic to maintain SRP compliance.
|
|
10
9
|
|
|
11
10
|
Dependencies: DRYCache, CodeBlock, Path
|
|
12
11
|
|
|
@@ -15,7 +14,7 @@ Exports: DuplicateStorage class
|
|
|
15
14
|
Interfaces: DuplicateStorage.add_blocks(file_path, blocks), get_duplicate_hashes(),
|
|
16
15
|
get_blocks_for_hash(hash_value)
|
|
17
16
|
|
|
18
|
-
Implementation: Delegates to
|
|
17
|
+
Implementation: Delegates to SQLite cache for all storage operations
|
|
19
18
|
"""
|
|
20
19
|
|
|
21
20
|
from pathlib import Path
|
|
@@ -24,82 +23,36 @@ from .cache import CodeBlock, DRYCache
|
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
class DuplicateStorage:
|
|
27
|
-
"""Manages storage of code blocks in
|
|
26
|
+
"""Manages storage of code blocks in SQLite."""
|
|
28
27
|
|
|
29
|
-
def __init__(self, cache: DRYCache
|
|
30
|
-
"""Initialize storage with
|
|
28
|
+
def __init__(self, cache: DRYCache) -> None:
|
|
29
|
+
"""Initialize storage with SQLite cache.
|
|
31
30
|
|
|
32
31
|
Args:
|
|
33
|
-
cache: SQLite cache instance (
|
|
32
|
+
cache: SQLite cache instance (in-memory or tempfile mode)
|
|
34
33
|
"""
|
|
35
34
|
self._cache = cache
|
|
36
|
-
self._memory_store: dict[int, list[CodeBlock]] = {}
|
|
37
35
|
|
|
38
36
|
def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
39
|
-
"""Add code blocks to storage
|
|
37
|
+
"""Add code blocks to SQLite storage.
|
|
40
38
|
|
|
41
39
|
Args:
|
|
42
40
|
file_path: Path to source file
|
|
43
41
|
blocks: List of code blocks to store
|
|
44
42
|
"""
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
# Also persist to cache if available
|
|
49
|
-
if self._cache:
|
|
50
|
-
self._add_to_cache(file_path, blocks)
|
|
51
|
-
|
|
52
|
-
def add_blocks_to_memory(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
53
|
-
"""Add code blocks to in-memory storage only (for cache hits).
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
file_path: Path to source file (used for cache persistence check)
|
|
57
|
-
blocks: List of code blocks to store
|
|
58
|
-
"""
|
|
59
|
-
# Add to memory for duplicate detection this run
|
|
60
|
-
self._add_to_memory(blocks)
|
|
61
|
-
|
|
62
|
-
# Guard clauses - early returns for skip conditions
|
|
63
|
-
if not self._cache:
|
|
64
|
-
return
|
|
65
|
-
|
|
66
|
-
if not blocks:
|
|
67
|
-
return
|
|
68
|
-
|
|
69
|
-
# Update cache with new blocks if needed (for fresh analysis)
|
|
70
|
-
self._update_cache_if_fresh(file_path, blocks)
|
|
71
|
-
|
|
72
|
-
def _update_cache_if_fresh(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
73
|
-
"""Update cache if file analysis is fresh (not from cache).
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
file_path: Path to source file
|
|
77
|
-
blocks: List of code blocks to store
|
|
78
|
-
"""
|
|
79
|
-
if not self._cache:
|
|
80
|
-
return
|
|
81
|
-
|
|
82
|
-
try:
|
|
83
|
-
mtime = file_path.stat().st_mtime
|
|
84
|
-
except OSError:
|
|
85
|
-
# File doesn't exist, skip cache
|
|
86
|
-
return
|
|
87
|
-
|
|
88
|
-
# File was analyzed (not cached), so persist if not fresh
|
|
89
|
-
if not self._cache.is_fresh(file_path, mtime):
|
|
90
|
-
self._add_to_cache(file_path, blocks)
|
|
43
|
+
if blocks:
|
|
44
|
+
self._cache.add_blocks(file_path, blocks)
|
|
91
45
|
|
|
92
46
|
def get_duplicate_hashes(self) -> list[int]:
|
|
93
|
-
"""Get all hash values with 2+ occurrences from
|
|
47
|
+
"""Get all hash values with 2+ occurrences from SQLite.
|
|
94
48
|
|
|
95
49
|
Returns:
|
|
96
50
|
List of hash values that appear in multiple blocks
|
|
97
51
|
"""
|
|
98
|
-
|
|
99
|
-
return [h for h, blocks in self._memory_store.items() if len(blocks) >= 2]
|
|
52
|
+
return self._cache.get_duplicate_hashes()
|
|
100
53
|
|
|
101
54
|
def get_blocks_for_hash(self, hash_value: int) -> list[CodeBlock]:
|
|
102
|
-
"""Get all blocks with given hash value from
|
|
55
|
+
"""Get all blocks with given hash value from SQLite.
|
|
103
56
|
|
|
104
57
|
Args:
|
|
105
58
|
hash_value: Hash to search for
|
|
@@ -107,20 +60,4 @@ class DuplicateStorage:
|
|
|
107
60
|
Returns:
|
|
108
61
|
List of code blocks with this hash
|
|
109
62
|
"""
|
|
110
|
-
|
|
111
|
-
return self._memory_store.get(hash_value, [])
|
|
112
|
-
|
|
113
|
-
def _add_to_cache(self, file_path: Path, blocks: list[CodeBlock]) -> None:
|
|
114
|
-
"""Add blocks to SQLite cache."""
|
|
115
|
-
if not self._cache or not blocks:
|
|
116
|
-
return
|
|
117
|
-
|
|
118
|
-
mtime = file_path.stat().st_mtime
|
|
119
|
-
self._cache.save(file_path, mtime, blocks)
|
|
120
|
-
|
|
121
|
-
def _add_to_memory(self, blocks: list[CodeBlock]) -> None:
|
|
122
|
-
"""Add blocks to in-memory store."""
|
|
123
|
-
for block in blocks:
|
|
124
|
-
if block.hash_value not in self._memory_store:
|
|
125
|
-
self._memory_store[block.hash_value] = []
|
|
126
|
-
self._memory_store[block.hash_value].append(block)
|
|
63
|
+
return self._cache.find_duplicates_by_hash(hash_value)
|