thailint 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/cli.py CHANGED
@@ -363,7 +363,7 @@ def config_reset(ctx, yes: bool):
363
363
 
364
364
 
365
365
  @cli.command("file-placement")
366
- @click.argument("paths", nargs=-1, type=click.Path(exists=True))
366
+ @click.argument("paths", nargs=-1, type=click.Path())
367
367
  @click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
368
368
  @click.option("--rules", "-r", help="Inline JSON rules configuration")
369
369
  @format_option
@@ -432,6 +432,7 @@ def _execute_file_placement_lint( # pylint: disable=too-many-arguments,too-many
432
432
  path_objs, config_file, rules, format, recursive, verbose
433
433
  ):
434
434
  """Execute file placement linting."""
435
+ _validate_paths_exist(path_objs)
435
436
  orchestrator = _setup_orchestrator(path_objs, config_file, rules, verbose)
436
437
  all_violations = _execute_linting_on_paths(orchestrator, path_objs, recursive)
437
438
 
@@ -453,6 +454,28 @@ def _handle_linting_error(error: Exception, verbose: bool) -> None:
453
454
  sys.exit(2)
454
455
 
455
456
 
457
+ def _validate_paths_exist(path_objs: list[Path]) -> None:
458
+ """Validate that all provided paths exist.
459
+
460
+ Args:
461
+ path_objs: List of Path objects to validate
462
+
463
+ Raises:
464
+ SystemExit: If any path doesn't exist (exit code 2)
465
+ """
466
+ for path in path_objs:
467
+ if not path.exists():
468
+ click.echo(f"Error: Path does not exist: {path}", err=True)
469
+ click.echo("", err=True)
470
+ click.echo(
471
+ "Hint: When using Docker, ensure paths are inside the mounted volume:", err=True
472
+ )
473
+ click.echo(
474
+ " docker run -v $(pwd):/data thailint <command> /data/your-file.py", err=True
475
+ )
476
+ sys.exit(2)
477
+
478
+
456
479
  def _find_project_root(start_path: Path) -> Path:
457
480
  """Find project root by looking for .git or pyproject.toml.
458
481
 
@@ -637,7 +660,7 @@ def _run_nesting_lint(orchestrator, path_objs: list[Path], recursive: bool):
637
660
 
638
661
 
639
662
  @cli.command("nesting")
640
- @click.argument("paths", nargs=-1, type=click.Path(exists=True))
663
+ @click.argument("paths", nargs=-1, type=click.Path())
641
664
  @click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
642
665
  @format_option
643
666
  @click.option("--max-depth", type=int, help="Override max nesting depth (default: 4)")
@@ -710,6 +733,7 @@ def _execute_nesting_lint( # pylint: disable=too-many-arguments,too-many-positi
710
733
  path_objs, config_file, format, max_depth, recursive, verbose
711
734
  ):
712
735
  """Execute nesting lint."""
736
+ _validate_paths_exist(path_objs)
713
737
  orchestrator = _setup_nesting_orchestrator(path_objs, config_file, verbose)
714
738
  _apply_nesting_config_override(orchestrator, max_depth, verbose)
715
739
  nesting_violations = _run_nesting_lint(orchestrator, path_objs, recursive)
@@ -773,7 +797,7 @@ def _run_srp_lint(orchestrator, path_objs: list[Path], recursive: bool):
773
797
 
774
798
 
775
799
  @cli.command("srp")
776
- @click.argument("paths", nargs=-1, type=click.Path(exists=True))
800
+ @click.argument("paths", nargs=-1, type=click.Path())
777
801
  @click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
778
802
  @format_option
779
803
  @click.option("--max-methods", type=int, help="Override max methods per class (default: 7)")
@@ -845,6 +869,7 @@ def _execute_srp_lint( # pylint: disable=too-many-arguments,too-many-positional
845
869
  path_objs, config_file, format, max_methods, max_loc, recursive, verbose
846
870
  ):
847
871
  """Execute SRP lint."""
872
+ _validate_paths_exist(path_objs)
848
873
  orchestrator = _setup_srp_orchestrator(path_objs, config_file, verbose)
849
874
  _apply_srp_config_override(orchestrator, max_methods, max_loc, verbose)
850
875
  srp_violations = _run_srp_lint(orchestrator, path_objs, recursive)
@@ -857,7 +882,7 @@ def _execute_srp_lint( # pylint: disable=too-many-arguments,too-many-positional
857
882
 
858
883
 
859
884
  @cli.command("dry")
860
- @click.argument("paths", nargs=-1, type=click.Path(exists=True))
885
+ @click.argument("paths", nargs=-1, type=click.Path())
861
886
  @click.option("--config", "-c", "config_file", type=click.Path(), help="Path to config file")
862
887
  @format_option
863
888
  @click.option("--min-lines", type=int, help="Override min duplicate lines threshold")
@@ -943,6 +968,7 @@ def _execute_dry_lint( # pylint: disable=too-many-arguments,too-many-positional
943
968
  path_objs, config_file, format, min_lines, no_cache, clear_cache, recursive, verbose
944
969
  ):
945
970
  """Execute DRY linting."""
971
+ _validate_paths_exist(path_objs)
946
972
  orchestrator = _setup_dry_orchestrator(path_objs, config_file, verbose)
947
973
  _apply_dry_config_override(orchestrator, min_lines, no_cache, verbose)
948
974
 
src/config.py CHANGED
@@ -34,6 +34,10 @@ class ConfigError(Exception):
34
34
  """Configuration-related errors."""
35
35
 
36
36
 
37
+ # Default configuration constants
38
+ DEFAULT_MAX_RETRIES = 3
39
+ DEFAULT_TIMEOUT_SECONDS = 30
40
+
37
41
  # Default configuration values
38
42
  DEFAULT_CONFIG: dict[str, Any] = {
39
43
  "app_name": "{{PROJECT_NAME}}",
@@ -41,8 +45,8 @@ DEFAULT_CONFIG: dict[str, Any] = {
41
45
  "log_level": "INFO",
42
46
  "output_format": "text",
43
47
  "greeting": "Hello",
44
- "max_retries": 3,
45
- "timeout": 30,
48
+ "max_retries": DEFAULT_MAX_RETRIES,
49
+ "timeout": DEFAULT_TIMEOUT_SECONDS,
46
50
  }
47
51
 
48
52
  # Configuration file search paths (in priority order)
src/core/base.py CHANGED
@@ -8,14 +8,17 @@ Overview: Establishes the contract that all linting plugins must follow through
8
8
  Defines BaseLintRule which all concrete linting rules inherit from, specifying required
9
9
  properties (rule_id, rule_name, description) and the check() method for violation detection.
10
10
  Provides BaseLintContext as the interface for accessing file information during analysis,
11
- exposing file_path, file_content, and language properties. These abstractions enable the
12
- rule registry to discover and instantiate rules dynamically without tight coupling, supporting
13
- the extensible plugin system where new rules can be added by simply placing them in the
14
- appropriate directory structure.
11
+ exposing file_path, file_content, and language properties. Includes MultiLanguageLintRule
12
+ intermediate class implementing template method pattern for language dispatch, eliminating
13
+ code duplication across multi-language linters (nesting, srp, magic_numbers). These
14
+ abstractions enable the rule registry to discover and instantiate rules dynamically without
15
+ tight coupling, supporting the extensible plugin system where new rules can be added by
16
+ simply placing them in the appropriate directory structure.
15
17
 
16
18
  Dependencies: abc for abstract base class support, pathlib for Path types, Violation from types
17
19
 
18
- Exports: BaseLintRule (abstract rule interface), BaseLintContext (abstract context interface)
20
+ Exports: BaseLintRule (abstract rule interface), BaseLintContext (abstract context interface),
21
+ MultiLanguageLintRule (template method base for multi-language linters)
19
22
 
20
23
  Interfaces: BaseLintRule.check(context) -> list[Violation], BaseLintContext properties
21
24
  (file_path, file_content, language), all abstract methods must be implemented by subclasses
@@ -26,6 +29,7 @@ Implementation: ABC-based interface definitions with @abstractmethod decorators,
26
29
 
27
30
  from abc import ABC, abstractmethod
28
31
  from pathlib import Path
32
+ from typing import Any
29
33
 
30
34
  from .types import Violation
31
35
 
@@ -132,3 +136,84 @@ class BaseLintRule(ABC):
132
136
  List of violations found during finalization. Empty list by default.
133
137
  """
134
138
  return []
139
+
140
+
141
+ class MultiLanguageLintRule(BaseLintRule):
142
+ """Base class for linting rules that support multiple programming languages.
143
+
144
+ Provides language dispatch pattern to eliminate code duplication across multi-language
145
+ linters. Subclasses implement language-specific checking methods rather than handling
146
+ dispatch logic themselves.
147
+
148
+ Subclasses must implement:
149
+ - _check_python(context, config) for Python language support
150
+ - _check_typescript(context, config) for TypeScript/JavaScript support
151
+ - _load_config(context) for configuration loading
152
+ """
153
+
154
+ def check(self, context: BaseLintContext) -> list[Violation]:
155
+ """Check for violations with automatic language dispatch.
156
+
157
+ Dispatches to language-specific checking methods based on context.language.
158
+ Handles common patterns like file content validation and config loading.
159
+
160
+ Args:
161
+ context: Lint context with file information
162
+
163
+ Returns:
164
+ List of violations found
165
+ """
166
+ from .linter_utils import has_file_content
167
+
168
+ if not has_file_content(context):
169
+ return []
170
+
171
+ config = self._load_config(context)
172
+ if not config.enabled:
173
+ return []
174
+
175
+ if context.language == "python":
176
+ return self._check_python(context, config)
177
+
178
+ if context.language in ("typescript", "javascript"):
179
+ return self._check_typescript(context, config)
180
+
181
+ return []
182
+
183
+ @abstractmethod
184
+ def _load_config(self, context: BaseLintContext) -> Any:
185
+ """Load configuration from context.
186
+
187
+ Args:
188
+ context: Lint context
189
+
190
+ Returns:
191
+ Configuration object with at minimum an 'enabled' attribute
192
+ """
193
+ raise NotImplementedError("Subclasses must implement _load_config")
194
+
195
+ @abstractmethod
196
+ def _check_python(self, context: BaseLintContext, config: Any) -> list[Violation]:
197
+ """Check Python code for violations.
198
+
199
+ Args:
200
+ context: Lint context with Python file information
201
+ config: Loaded configuration
202
+
203
+ Returns:
204
+ List of violations found in Python code
205
+ """
206
+ raise NotImplementedError("Subclasses must implement _check_python")
207
+
208
+ @abstractmethod
209
+ def _check_typescript(self, context: BaseLintContext, config: Any) -> list[Violation]:
210
+ """Check TypeScript/JavaScript code for violations.
211
+
212
+ Args:
213
+ context: Lint context with TypeScript/JavaScript file information
214
+ config: Loaded configuration
215
+
216
+ Returns:
217
+ List of violations found in TypeScript/JavaScript code
218
+ """
219
+ raise NotImplementedError("Subclasses must implement _check_typescript")
@@ -23,6 +23,9 @@ from abc import ABC, abstractmethod
23
23
  from pathlib import Path
24
24
  from typing import Protocol
25
25
 
26
+ # Default filter threshold constants
27
+ DEFAULT_KEYWORD_ARG_THRESHOLD = 0.8
28
+
26
29
 
27
30
  class CodeBlock(Protocol):
28
31
  """Protocol for code blocks (matches cache.CodeBlock)."""
@@ -67,7 +70,7 @@ class KeywordArgumentFilter(BaseBlockFilter):
67
70
  These are common in builder patterns and API calls.
68
71
  """
69
72
 
70
- def __init__(self, threshold: float = 0.8):
73
+ def __init__(self, threshold: float = DEFAULT_KEYWORD_ARG_THRESHOLD):
71
74
  """Initialize filter.
72
75
 
73
76
  Args:
@@ -256,7 +259,7 @@ def create_default_registry() -> BlockFilterRegistry:
256
259
  registry = BlockFilterRegistry()
257
260
 
258
261
  # Register built-in filters
259
- registry.register(KeywordArgumentFilter(threshold=0.8))
262
+ registry.register(KeywordArgumentFilter(threshold=DEFAULT_KEYWORD_ARG_THRESHOLD))
260
263
  registry.register(ImportGroupFilter())
261
264
 
262
265
  return registry
src/linters/dry/cache.py CHANGED
@@ -1,26 +1,27 @@
1
1
  """
2
- Purpose: SQLite cache manager for DRY linter with mtime-based invalidation
2
+ Purpose: SQLite storage manager for DRY linter duplicate detection
3
3
 
4
- Scope: Code block storage, cache operations, and duplicate detection queries
4
+ Scope: Code block storage and duplicate detection queries
5
5
 
6
- Overview: Implements persistent caching layer for duplicate code detection using SQLite database.
7
- Stores code blocks with hash values, file locations, and metadata. Provides mtime-based cache
8
- invalidation to detect stale entries. Serves dual purpose as both cache (avoid re-hashing) and
9
- hash table (query duplicates across project). Includes indexes for fast hash lookups enabling
10
- cross-file duplicate detection with minimal overhead.
6
+ Overview: Implements in-memory or temporary-file SQLite storage for duplicate code detection.
7
+ Stores code blocks with hash values, file locations, and metadata during a single linter run.
8
+ Supports both :memory: mode (fast, RAM-only) and tempfile mode (disk-backed for large projects).
9
+ No persistence between runs - storage is cleared when linter completes. Includes indexes for
10
+ fast hash lookups enabling cross-file duplicate detection with minimal overhead.
11
11
 
12
- Dependencies: Python sqlite3 module (stdlib), pathlib.Path, dataclasses
12
+ Dependencies: Python sqlite3 module (stdlib), tempfile module (stdlib), pathlib.Path, dataclasses
13
13
 
14
14
  Exports: CodeBlock dataclass, DRYCache class
15
15
 
16
- Interfaces: DRYCache.__init__, is_fresh, load, save, find_duplicates_by_hash, get_blocks_for_file,
17
- add_blocks, cleanup_stale, close
16
+ Interfaces: DRYCache.__init__(storage_mode), add_blocks(file_path, blocks),
17
+ find_duplicates_by_hash(hash_value), get_duplicate_hashes(), close()
18
18
 
19
19
  Implementation: SQLite with two tables (files, code_blocks), indexed on hash_value for performance,
20
- ACID transactions for reliability, foreign key constraints for data integrity
20
+ storage_mode determines :memory: vs tempfile location, ACID transactions for reliability
21
21
  """
22
22
 
23
23
  import sqlite3
24
+ import tempfile
24
25
  from dataclasses import dataclass
25
26
  from pathlib import Path
26
27
 
@@ -39,20 +40,32 @@ class CodeBlock:
39
40
 
40
41
 
41
42
  class DRYCache:
42
- """SQLite-backed cache for duplicate detection."""
43
+ """SQLite-backed storage for duplicate detection."""
43
44
 
44
45
  SCHEMA_VERSION = 1
45
46
 
46
- def __init__(self, cache_path: Path) -> None:
47
- """Initialize cache with SQLite database.
47
+ def __init__(self, storage_mode: str = "memory") -> None:
48
+ """Initialize storage with SQLite database.
48
49
 
49
50
  Args:
50
- cache_path: Path to SQLite database file
51
+ storage_mode: Storage mode - "memory" (default) or "tempfile"
51
52
  """
52
- # Ensure parent directory exists
53
- cache_path.parent.mkdir(parents=True, exist_ok=True)
53
+ self._storage_mode = storage_mode
54
+ self._tempfile = None
55
+
56
+ # Create SQLite connection based on storage mode
57
+ if storage_mode == "memory":
58
+ self.db = sqlite3.connect(":memory:")
59
+ elif storage_mode == "tempfile":
60
+ # Create temporary file that auto-deletes on close
61
+ # pylint: disable=consider-using-with
62
+ # Justification: tempfile must remain open for SQLite connection lifetime.
63
+ # It is explicitly closed in close() method when cache is finalized.
64
+ self._tempfile = tempfile.NamedTemporaryFile(suffix=".db", delete=True)
65
+ self.db = sqlite3.connect(self._tempfile.name)
66
+ else:
67
+ raise ValueError(f"Invalid storage_mode: {storage_mode}")
54
68
 
55
- self.db = sqlite3.connect(str(cache_path))
56
69
  self._query_service = CacheQueryService()
57
70
 
58
71
  # Create schema
@@ -82,68 +95,24 @@ class DRYCache:
82
95
 
83
96
  self.db.commit()
84
97
 
85
- def is_fresh(self, file_path: Path, current_mtime: float) -> bool:
86
- """Check if cached data is fresh (mtime matches).
98
+ def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
99
+ """Add code blocks to storage.
87
100
 
88
101
  Args:
89
- file_path: Path to file
90
- current_mtime: Current modification time
91
-
92
- Returns:
93
- True if cache is fresh, False if stale or missing
94
- """
95
- cursor = self.db.execute("SELECT mtime FROM files WHERE file_path = ?", (str(file_path),))
96
- row = cursor.fetchone()
97
-
98
- if not row:
99
- return False # Not in cache
100
-
101
- cached_mtime = row[0]
102
- return cached_mtime == current_mtime
103
-
104
- def load(self, file_path: Path) -> list[CodeBlock]:
105
- """Load cached code blocks for file.
106
-
107
- Args:
108
- file_path: Path to file
109
-
110
- Returns:
111
- List of CodeBlock instances from cache
102
+ file_path: Path to source file
103
+ blocks: List of CodeBlock instances to store
112
104
  """
113
- cursor = self.db.execute(
114
- """SELECT hash_value, start_line, end_line, snippet
115
- FROM code_blocks
116
- WHERE file_path = ?""",
117
- (str(file_path),),
118
- )
119
-
120
- blocks = []
121
- for hash_val, start, end, snippet in cursor:
122
- block = CodeBlock(
123
- file_path=file_path,
124
- start_line=start,
125
- end_line=end,
126
- snippet=snippet,
127
- hash_value=hash_val,
128
- )
129
- blocks.append(block)
130
-
131
- return blocks
132
-
133
- def save(self, file_path: Path, mtime: float, blocks: list[CodeBlock]) -> None:
134
- """Save code blocks to cache.
135
-
136
- Args:
137
- file_path: Path to file
138
- mtime: File modification time
139
- blocks: List of CodeBlock instances to cache
140
- """
141
- # Delete old data for this file
142
- self.db.execute("DELETE FROM files WHERE file_path = ?", (str(file_path),))
105
+ if not blocks:
106
+ return
143
107
 
144
108
  # Insert file metadata
109
+ try:
110
+ mtime = file_path.stat().st_mtime
111
+ except OSError:
112
+ mtime = 0.0 # File doesn't exist, use placeholder
113
+
145
114
  self.db.execute(
146
- "INSERT INTO files (file_path, mtime, hash_count) VALUES (?, ?, ?)",
115
+ "INSERT OR REPLACE INTO files (file_path, mtime, hash_count) VALUES (?, ?, ?)",
147
116
  (str(file_path), mtime, len(blocks)),
148
117
  )
149
118
 
@@ -164,23 +133,6 @@ class DRYCache:
164
133
 
165
134
  self.db.commit()
166
135
 
167
- def cleanup_stale(self, max_age_days: int) -> None:
168
- """Remove cache entries older than max_age_days.
169
-
170
- Args:
171
- max_age_days: Maximum age in days for cache entries
172
- """
173
- # Use parameterized query to prevent SQL injection
174
- self.db.execute(
175
- """DELETE FROM files
176
- WHERE last_scanned < datetime('now', ? || ' days')""",
177
- (f"-{max_age_days}",),
178
- )
179
-
180
- # Vacuum to reclaim space
181
- self.db.execute("VACUUM")
182
- self.db.commit()
183
-
184
136
  def find_duplicates_by_hash(self, hash_value: int) -> list[CodeBlock]:
185
137
  """Find all code blocks with the given hash value.
186
138
 
@@ -214,5 +166,7 @@ class DRYCache:
214
166
  return self._query_service.get_duplicate_hashes(self.db)
215
167
 
216
168
  def close(self) -> None:
217
- """Close database connection."""
169
+ """Close database connection and cleanup tempfile if used."""
218
170
  self.db.close()
171
+ if self._tempfile:
172
+ self._tempfile.close()
src/linters/dry/config.py CHANGED
@@ -20,6 +20,10 @@ Implementation: Dataclass with field defaults, __post_init__ validation, and dic
20
20
  from dataclasses import dataclass, field
21
21
  from typing import Any
22
22
 
23
+ # Default configuration constants
24
+ DEFAULT_MIN_DUPLICATE_LINES = 3
25
+ DEFAULT_MIN_DUPLICATE_TOKENS = 30
26
+
23
27
 
24
28
  @dataclass
25
29
  class DRYConfig: # pylint: disable=too-many-instance-attributes
@@ -27,14 +31,14 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
27
31
 
28
32
  Note: Pylint too-many-instance-attributes disabled. This is a configuration
29
33
  dataclass serving as a data container for related DRY linter settings.
30
- All 12 attributes are cohesively related (detection thresholds, language
31
- overrides, caching, filtering). Splitting would reduce cohesion and make
34
+ All attributes are cohesively related (detection thresholds, language
35
+ overrides, storage mode, filtering). Splitting would reduce cohesion and make
32
36
  configuration loading more complex without meaningful benefit.
33
37
  """
34
38
 
35
39
  enabled: bool = False # Must be explicitly enabled
36
- min_duplicate_lines: int = 3
37
- min_duplicate_tokens: int = 30
40
+ min_duplicate_lines: int = DEFAULT_MIN_DUPLICATE_LINES
41
+ min_duplicate_tokens: int = DEFAULT_MIN_DUPLICATE_TOKENS
38
42
  min_occurrences: int = 2 # Minimum occurrences to report (default: 2)
39
43
 
40
44
  # Language-specific overrides
@@ -42,10 +46,8 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
42
46
  typescript_min_occurrences: int | None = None
43
47
  javascript_min_occurrences: int | None = None
44
48
 
45
- # Cache settings
46
- cache_enabled: bool = True # ON by default for performance
47
- cache_path: str = ".thailint-cache/dry.db"
48
- cache_max_age_days: int = 30
49
+ # Storage settings
50
+ storage_mode: str = "memory" # Options: "memory" (default) or "tempfile"
49
51
 
50
52
  # Ignore patterns
51
53
  ignore_patterns: list[str] = field(default_factory=lambda: ["tests/", "__init__.py"])
@@ -70,6 +72,10 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
70
72
  )
71
73
  if self.min_occurrences <= 0:
72
74
  raise ValueError(f"min_occurrences must be positive, got {self.min_occurrences}")
75
+ if self.storage_mode not in ("memory", "tempfile"):
76
+ raise ValueError(
77
+ f"storage_mode must be 'memory' or 'tempfile', got '{self.storage_mode}'"
78
+ )
73
79
 
74
80
  def get_min_occurrences_for_language(self, language: str) -> int:
75
81
  """Get minimum occurrences threshold for a specific language.
@@ -116,15 +122,13 @@ class DRYConfig: # pylint: disable=too-many-instance-attributes
116
122
 
117
123
  return cls(
118
124
  enabled=config.get("enabled", False),
119
- min_duplicate_lines=config.get("min_duplicate_lines", 3),
120
- min_duplicate_tokens=config.get("min_duplicate_tokens", 30),
125
+ min_duplicate_lines=config.get("min_duplicate_lines", DEFAULT_MIN_DUPLICATE_LINES),
126
+ min_duplicate_tokens=config.get("min_duplicate_tokens", DEFAULT_MIN_DUPLICATE_TOKENS),
121
127
  min_occurrences=config.get("min_occurrences", 2),
122
128
  python_min_occurrences=python_config.get("min_occurrences"),
123
129
  typescript_min_occurrences=typescript_config.get("min_occurrences"),
124
130
  javascript_min_occurrences=javascript_config.get("min_occurrences"),
125
- cache_enabled=config.get("cache_enabled", True),
126
- cache_path=config.get("cache_path", ".thailint-cache/dry.db"),
127
- cache_max_age_days=config.get("cache_max_age_days", 30),
131
+ storage_mode=config.get("storage_mode", "memory"),
128
132
  ignore_patterns=config.get("ignore", []),
129
133
  filters=filters,
130
134
  )
@@ -1,12 +1,11 @@
1
1
  """
2
- Purpose: Storage management for duplicate code blocks with cache and memory fallback
2
+ Purpose: Storage management for duplicate code blocks in SQLite
3
3
 
4
- Scope: Manages storage of code blocks in SQLite cache or in-memory dict
4
+ Scope: Manages storage of code blocks in SQLite for duplicate detection
5
5
 
6
- Overview: Provides unified storage interface for code blocks supporting both SQLite-backed caching
7
- and in-memory fallback when cache disabled. Handles block insertion, retrieval, and duplicate
8
- hash queries. Encapsulates Decision 6 (in-memory fallback) implementation. Separates storage
9
- concerns from linting logic to maintain SRP compliance.
6
+ Overview: Provides storage interface for code blocks using SQLite (in-memory or tempfile mode).
7
+ Handles block insertion and duplicate hash queries. Delegates all storage operations to
8
+ DRYCache SQLite layer. Separates storage concerns from linting logic to maintain SRP compliance.
10
9
 
11
10
  Dependencies: DRYCache, CodeBlock, Path
12
11
 
@@ -15,7 +14,7 @@ Exports: DuplicateStorage class
15
14
  Interfaces: DuplicateStorage.add_blocks(file_path, blocks), get_duplicate_hashes(),
16
15
  get_blocks_for_hash(hash_value)
17
16
 
18
- Implementation: Delegates to either SQLite cache or in-memory dict based on cache_enabled setting
17
+ Implementation: Delegates to SQLite cache for all storage operations
19
18
  """
20
19
 
21
20
  from pathlib import Path
@@ -24,82 +23,36 @@ from .cache import CodeBlock, DRYCache
24
23
 
25
24
 
26
25
  class DuplicateStorage:
27
- """Manages storage of code blocks in cache or memory."""
26
+ """Manages storage of code blocks in SQLite."""
28
27
 
29
- def __init__(self, cache: DRYCache | None) -> None:
30
- """Initialize storage with optional cache.
28
+ def __init__(self, cache: DRYCache) -> None:
29
+ """Initialize storage with SQLite cache.
31
30
 
32
31
  Args:
33
- cache: SQLite cache instance (None for in-memory mode)
32
+ cache: SQLite cache instance (in-memory or tempfile mode)
34
33
  """
35
34
  self._cache = cache
36
- self._memory_store: dict[int, list[CodeBlock]] = {}
37
35
 
38
36
  def add_blocks(self, file_path: Path, blocks: list[CodeBlock]) -> None:
39
- """Add code blocks to storage and cache.
37
+ """Add code blocks to SQLite storage.
40
38
 
41
39
  Args:
42
40
  file_path: Path to source file
43
41
  blocks: List of code blocks to store
44
42
  """
45
- # Always add to memory for duplicate detection
46
- self._add_to_memory(blocks)
47
-
48
- # Also persist to cache if available
49
- if self._cache:
50
- self._add_to_cache(file_path, blocks)
51
-
52
- def add_blocks_to_memory(self, file_path: Path, blocks: list[CodeBlock]) -> None:
53
- """Add code blocks to in-memory storage only (for cache hits).
54
-
55
- Args:
56
- file_path: Path to source file (used for cache persistence check)
57
- blocks: List of code blocks to store
58
- """
59
- # Add to memory for duplicate detection this run
60
- self._add_to_memory(blocks)
61
-
62
- # Guard clauses - early returns for skip conditions
63
- if not self._cache:
64
- return
65
-
66
- if not blocks:
67
- return
68
-
69
- # Update cache with new blocks if needed (for fresh analysis)
70
- self._update_cache_if_fresh(file_path, blocks)
71
-
72
- def _update_cache_if_fresh(self, file_path: Path, blocks: list[CodeBlock]) -> None:
73
- """Update cache if file analysis is fresh (not from cache).
74
-
75
- Args:
76
- file_path: Path to source file
77
- blocks: List of code blocks to store
78
- """
79
- if not self._cache:
80
- return
81
-
82
- try:
83
- mtime = file_path.stat().st_mtime
84
- except OSError:
85
- # File doesn't exist, skip cache
86
- return
87
-
88
- # File was analyzed (not cached), so persist if not fresh
89
- if not self._cache.is_fresh(file_path, mtime):
90
- self._add_to_cache(file_path, blocks)
43
+ if blocks:
44
+ self._cache.add_blocks(file_path, blocks)
91
45
 
92
46
  def get_duplicate_hashes(self) -> list[int]:
93
- """Get all hash values with 2+ occurrences from memory.
47
+ """Get all hash values with 2+ occurrences from SQLite.
94
48
 
95
49
  Returns:
96
50
  List of hash values that appear in multiple blocks
97
51
  """
98
- # Always query from in-memory store for this run's files
99
- return [h for h, blocks in self._memory_store.items() if len(blocks) >= 2]
52
+ return self._cache.get_duplicate_hashes()
100
53
 
101
54
  def get_blocks_for_hash(self, hash_value: int) -> list[CodeBlock]:
102
- """Get all blocks with given hash value from memory.
55
+ """Get all blocks with given hash value from SQLite.
103
56
 
104
57
  Args:
105
58
  hash_value: Hash to search for
@@ -107,20 +60,4 @@ class DuplicateStorage:
107
60
  Returns:
108
61
  List of code blocks with this hash
109
62
  """
110
- # Always query from in-memory store for this run's files
111
- return self._memory_store.get(hash_value, [])
112
-
113
- def _add_to_cache(self, file_path: Path, blocks: list[CodeBlock]) -> None:
114
- """Add blocks to SQLite cache."""
115
- if not self._cache or not blocks:
116
- return
117
-
118
- mtime = file_path.stat().st_mtime
119
- self._cache.save(file_path, mtime, blocks)
120
-
121
- def _add_to_memory(self, blocks: list[CodeBlock]) -> None:
122
- """Add blocks to in-memory store."""
123
- for block in blocks:
124
- if block.hash_value not in self._memory_store:
125
- self._memory_store[block.hash_value] = []
126
- self._memory_store[block.hash_value].append(block)
63
+ return self._cache.find_duplicates_by_hash(hash_value)